[pnfs] [PATCH 1/2] Slot Table Implementation

Benny Halevy bhalevy at panasas.com
Sat Mar 17 06:54:11 EDT 2007


Rahul, this is not urgent but I really think we should move to a single
queue for several reason.  With your implementation when all slots
are used you have to do max_slots test_and_set operations to
discover that.  This will just not scale.   I agree with your point
about semaphores and renegotiating the number of slots so how
about this (sketch) instead:

nfs41_proc_sequence_done()
{
	int need_wakeup = 0;

	spin_lock(&res->channel->channel_lock);
	list_add(&res->slot->slot_list, &res->channel->unused_slots);
	spin_unlock(&res->channel->channel_lock);

	/* Wake up the threads waiting on this session */
	rpc_wake_up(&res->channel->sl_waitq);

 	return status;
 }

struct nfs4_slot *nfs4_find_slot(struct nfs4_channel *channel)
{
	struct nfs4_slot *slot;
	
	might_sleep();

	for (;;) {
		int ret = wait_event_interruptible(&channel->sl_waitq,
		                                   !list_empty(&channel->unused_slots));
		if (unlikely(ret) {
			BUG_ON(ret != -ERESTARTSYS);
			break;
		}

		spin_lock(&channel->channel_lock);
		if (unlikely(list_empty(&channel->unused_slots))) {
			spin_unlock(&channel->channel_lock);
			continue;
		}
		slot = list_entry(channel->unused_slots.next,
		                  struct nfs4_slot,
		                  slot_list);
                list_del(&slot->slot_list);
		spin_unlock(&channel->channel_lock);
		return slot;
	}

	return NULL;
}


Iyer, Rahul wrote:

>Hi Benny,
>A comment inline...
>Regards
>Rahul
>
>
>  
>
>>-----Original Message-----
>>From: Benny Halevy [mailto:bhalevy at panasas.com] 
>>Sent: Friday, March 16, 2007 2:24 AM
>>To: Iyer, Rahul
>>Cc: pnfs at linux-nfs.org
>>Subject: Re: [pnfs] [PATCH 1/2] Slot Table Implementation
>>
>>Rahul, my comments below...
>>
>>Benny
>>
>>iyer at netapp.com wrote:
>>    
>>
>>>From: iyer <iyer at netapp.com>
>>>
>>>Added code to implement a slot table.
>>>
>>>Signed-off-by: iyer <iyer at netapp.com>
>>>---
>>> fs/nfs/nfs41_sessions.h        |   62 --------
>>> fs/nfs/nfs4_fs.h               |    2 +-
>>> fs/nfs/nfs4proc.c              |  326 
>>>      
>>>
>>+++++++++++++++++++++++++++++++++++-----
>>    
>>
>>> fs/nfs/nfs4xdr.c               |   24 ++--
>>> fs/nfs/super.c                 |    6 +-
>>> include/linux/nfs41_sessions.h |   72 +++++++++
>>> include/linux/nfs_xdr.h        |    4 +
>>> include/linux/nfsd/state.h     |    2 -
>>> 8 files changed, 378 insertions(+), 120 deletions(-)  delete mode 
>>>100644 fs/nfs/nfs41_sessions.h  create mode 100644 
>>>include/linux/nfs41_sessions.h
>>>
>>>diff --git a/fs/nfs/nfs41_sessions.h 
>>>      
>>>
>>b/fs/nfs/nfs41_sessions.h deleted 
>>    
>>
>>>file mode 100644 index fc658c5..0000000
>>>--- a/fs/nfs/nfs41_sessions.h
>>>+++ /dev/null
>>>@@ -1,62 +0,0 @@
>>>-#ifndef __NFS4_1_SESSIONS_H__
>>>-#define __NFS4_1_SESSIONS_H__
>>>-
>>>-typedef unsigned char		sessionid_t[16];
>>>-typedef u32			streamchannel_attrs;
>>>-typedef u32			rdmachannel_attrs;
>>>-
>>>-struct nfs4_channel_attrs {
>>>-	unsigned long		max_rqst_sz;
>>>-	unsigned long		max_resp_sz;
>>>-	unsigned long		max_resp_sz_cached;
>>>-	unsigned long		max_ops;
>>>-	unsigned long		max_reqs;
>>>-	streamchannel_attrs	stream_attrs;
>>>-	rdmachannel_attrs	rdma_attrs;
>>>-};
>>>-
>>>-struct nfs4_channel {
>>>-	struct nfs4_channel_attrs	chan_attrs;
>>>-	unsigned long			nr_conns;
>>>-	struct list_head		rpc_clients;
>>>-};
>>>-
>>>-struct nfs4_session {
>>>-	/* Session related params */
>>>-	sessionid_t		sess_id;
>>>-	u32			seqid;  /* The seqid returned 
>>>      
>>>
>>by exchange_id */
>>    
>>
>>>-	u32			persist;
>>>-	u32			header_padding;
>>>-	u32			hash_alg;
>>>-	u32			ssv_len;
>>>-	u32			use_for_back_chan;
>>>-	u32			rdma_mode;
>>>-
>>>-	/* Slotid management */
>>>-	unsigned long		nr_slots_in_use;
>>>-	struct list_head	slots_in_use;
>>>-	struct list_head	unused_slots;
>>>-	struct rpc_wait_queue	slot_waitq;
>>>-
>>>-	/* The fore and back channel */
>>>-	struct nfs4_channel	fore_channel;
>>>-	struct nfs4_channel	back_channel;
>>>-
>>>-	unsigned int		expired;
>>>-	struct nfs4_client *	client;
>>>-	struct list_head	session_hashtbl;
>>>-	spinlock_t		session_lock;
>>>-	/* To prevent races between create_session and sequence */
>>>-	int			mutating;
>>>-	struct semaphore	session_sem;
>>>-	atomic_t	ref_count;
>>>-};
>>>-
>>>-struct nfs4_slot {
>>>-	u32			slot_nr;
>>>-	u32			seq_nr;
>>>-	struct nfs4_session *	session;
>>>-	struct list_head	slot_list;
>>>-};
>>>-
>>>-#endif
>>>diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 
>>>195d757..e50657e 100644
>>>--- a/fs/nfs/nfs4_fs.h
>>>+++ b/fs/nfs/nfs4_fs.h
>>>@@ -9,7 +9,7 @@
>>> #ifndef __LINUX_FS_NFS_NFS4_FS_H
>>> #define __LINUX_FS_NFS_NFS4_FS_H
>>> 
>>>-#include "nfs41_sessions.h"
>>>+#include <linux/nfs41_sessions.h>
>>> 
>>> #ifdef CONFIG_NFS_V4
>>> #define NFSV4_MAX_MINORVERSION 1
>>>diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 
>>>167ff4a..28513a1 100644
>>>--- a/fs/nfs/nfs4proc.c
>>>+++ b/fs/nfs/nfs4proc.c
>>>@@ -49,6 +49,7 @@
>>> #include <linux/namei.h>
>>> #include <linux/mount.h>
>>> #include <linux/module.h>
>>>+#include <linux/bitops.h>
>>> 
>>> #include "nfs4_fs.h"
>>> #include "delegation.h"
>>>@@ -221,8 +222,10 @@ static int 
>>>      
>>>
>>nfs41_proc_sequence_done(struct nfs4_session *session, struct nfs41_s
>>    
>>
>>> 	unsigned long timestamp;
>>> 	struct nfs4_client *clp;
>>> 
>>>-	if (!session || !(clp = session->client))
>>>-		return 0;
>>>+	if (!session || !(clp = session->client)) {
>>>+		printk(KERN_EMERG "%s is NULL!!!\n", 
>>>      
>>>
>>(!session)?"session":"clp");
>>    
>>
>>>+		BUG();
>>>+	}
>>>      
>>>
>>For performance reasons the defensive code should only be 
>>active with kernel debugging.  The conditional statement 
>>above is always executed needlessly and is not as clean as it 
>>could be because of the assignment embedded in the boolean 
>>expression.  Therefore, this better be coded like this:
>>
>>	BUG_ON(!session);
>>	clp = session->client;
>>	BUG_ON(!clp);
>>
>>    
>>
>>> 
>>> 	if (!status) {
>>> 		timestamp = jiffies;
>>>@@ -233,17 +236,131 @@ static int 
>>>      
>>>
>>nfs41_proc_sequence_done(struct nfs4_session *session, struct nfs41_s
>>    
>>
>>> 		spin_unlock(&clp->cl_lock);
>>> 	}
>>> 
>>>+	/* Clear the 'busy' bit on the slot that was used */
>>>+	smp_mb__before_clear_bit();
>>>+	clear_bit(NFS4_SLOT_BUSY, &res->slot->flags);
>>>+	smp_mb__after_clear_bit();
>>>+
>>>+	printk(KERN_EMERG "waking up waiters on slot %d\n", 
>>>+res->slot->slot_nr);
>>>+
>>>+	/* Wake up the threads waiting on this slot */
>>>+	wake_up_bit(&res->slot->flags, NFS4_SLOT_BUSY);
>>>+
>>> 	return status;
>>> }
>>> 
>>>+static int nfs4_wait_bit_interruptible(void *word) {
>>>+	if (signal_pending(current))
>>>+		return -ERESTARTSYS;
>>>+	schedule();
>>>+	return 0;
>>>+}
>>>+
>>>+/* Find the lowest numbered slot or sleep on the least 
>>>      
>>>
>>loaded slot */ 
>>    
>>
>>>+struct nfs4_slot *nfs4_find_slot(struct nfs4_channel *channel) {
>>>+	struct nfs4_slot_table *tbl;
>>>+	struct nfs4_slot *slot;
>>>+	struct nfs4_slot *target_slot;
>>>+	u32 max_slots;
>>>+	u32 min_waiters;
>>>+	int i;
>>>+	int need_to_sleep;
>>>+	
>>>+	might_sleep();
>>>+
>>>+	tbl = &channel->slot_table;
>>>+	min_waiters = tbl->slots[0].nr_waiters;
>>>      
>>>
>>atomic_read()
>>
>>    
>>
>>>+	target_slot = &tbl->slots[0];
>>>+
>>>+	spin_lock(&tbl->slot_tbl_lock);
>>>+	/* Make a local copy of max slots so we don't need to 
>>>      
>>>
>>hold it through
>>    
>>
>>>+	 * out. 
>>>+	 * XXX Will need to revalidate this if slots are reclaimed
>>>+	 */
>>>+	max_slots = tbl->max_slots;
>>>+	spin_unlock(&tbl->slot_tbl_lock);
>>>      
>>>
>>Using a spinlock here is unreasonably expensive when you 
>>could use atomic counters since the lock overhead is not 
>>amortized over multiple accesses to the structure.
>>max_slots and nr_waiters should be defined as atomic_t and 
>>atomic operations be used to access them; Alternatively use 
>>one spinlock for the whole channel around the outer loop
>>
>>    
>>
>>>+
>>>+	do {
>>>+		need_to_sleep = 1;
>>>+		for (i = 0; i <  max_slots; ++i) {
>>>+			slot = &tbl->slots[i];
>>>+			if (!test_and_set_bit(NFS4_SLOT_BUSY, 
>>>      
>>>
>>&slot->flags)){
>>    
>>
>>>+				/* We found an empty slot */
>>>+				target_slot = slot;
>>>+				need_to_sleep = 0;
>>>+				break;
>>>+			}
>>>+
>>>+			spin_lock(&slot->slot_lock);
>>>+			if (min_waiters > slot->nr_waiters) {
>>>+				min_waiters = slot->nr_waiters;
>>>+				target_slot = slot;
>>>+			}
>>>+			spin_unlock(&slot->slot_lock);
>>>      
>>>
>>why lock? use atomic_read(&slot->nr_waiters) also, if several 
>>threads go through this code in parallel they will pick the 
>>same target_slot so you want to increment.
>>That's another reason to use a bigger lock.
>>    
>>
>
>Using atomic_t is a bit hard when you have to do compare and set kind of
>operations...
>Like for instance:
>
>If (min_waiters > slot->nr_waiters) {
>	min_waiters = slot->nr_waiters;
>	target_slot = slot;
>}
>
>Doing the above atomically is hard with atomic variables only because
>the whole sequence needs to be atomic. However, I can change the
>spin_lock used to rwlock_t. This should better solve the problem. Right?
>
>  
>
>>>+			printk(KERN_EMERG "slot %d has busy bit 
>>>      
>>>
>>%s and nr_waiters is %u\n"
>>    
>>
>>>+					, slot->slot_nr, 
>>>+					(need_to_sleep)?"set":"unset", 
>>>+					slot->nr_waiters);
>>>+		}
>>>+	
>>>+		/* Check whether we need to sleep. If so, sleep 
>>>      
>>>
>>on the BUSY bit 
>>    
>>
>>>+		 * Increment the nr_waiters before sleeping and 
>>>      
>>>
>>decrement it 
>>    
>>
>>>+		 * when woken up
>>>+		 */
>>>+		if (need_to_sleep) {
>>>+			printk(KERN_EMERG "sleeping on slot %d; 
>>>      
>>>
>>seq_nr: %d\n",
>>    
>>
>>>+					target_slot->slot_nr, 
>>>+					target_slot->seq_nr);
>>>+			spin_lock(&target_slot->slot_lock);
>>>+			++target_slot->nr_waiters;
>>>+			spin_unlock(&target_slot->slot_lock);
>>>      
>>>
>>atomic_inc()
>>
>>    
>>
>>>+	
>>>+			/* XXX: We need to check the return value of
>>>+			 * eait_on_bit so that we can check if it's
>>>+			 * interrupted.
>>>+			 */
>>>+			wait_on_bit(&target_slot->flags, 
>>>      
>>>
>>NFS4_SLOT_BUSY, 
>>    
>>
>>>+					nfs4_wait_bit_interruptible,
>>>+					TASK_INTERRUPTIBLE);
>>>+
>>>+			spin_lock(&target_slot->slot_lock);
>>>+			--target_slot->nr_waiters;
>>>+			spin_unlock(&target_slot->slot_lock);
>>>      
>>>
>>atomic_dec()
>>
>>But wait a second, why do you want a queue per slot to begin with?
>>It seems like you can just go with a semaphore initialized to 
>>the number of slots, do down_interruptible before scanning 
>>for a free slot and up in nfs41_proc_sequence_done. Take a 
>>spin lock to scan the slot table (last index can be saved and 
>>rotated for uniformity) and if not free slot is found do a 
>>BUG() since the semaphore should prevent that from happening;
>>
>>
>>    
>>
>>>+		}
>>>+		else
>>>+			break;
>>>+	}while (test_and_set_bit(NFS4_SLOT_BUSY, &target_slot->flags));
>>>+
>>>+	printk(KERN_EMERG "slot id: %u\nseqid: %u\n max_slots: %u\n", 
>>>+			target_slot->slot_nr, target_slot->seq_nr, 
>>>+				max_slots);
>>>+	
>>>+	return target_slot;
>>>+}
>>>+	
>>> static int _nfs41_proc_setup_sequence(struct nfs4_session 
>>>      
>>>
>>*session, 
>>    
>>
>>>struct nfs41_sequence_args *args, struct nfs41_sequence_res *res)  {
>>> 	u32 *ptr;
>>>-
>>>+	struct nfs4_slot *slot;
>>>+	
>>> 	ptr = (u32 *)session->sess_id;
>>> 	dprintk("%s: %u:%u:%u:%u\n", __FUNCTION__, ptr[0], 
>>>      
>>>
>>ptr[1], ptr[2], 
>>    
>>
>>>ptr[3]);
>>> 
>>>-	memcpy(args->sessionid, (unsigned char 
>>>      
>>>
>>*)session->sess_id, NFS4_MAX_SESSIONID_LEN);
>>    
>>
>>>+	memcpy(args->sessionid, (unsigned char *)session->sess_id, 
>>>+					NFS4_MAX_SESSIONID_LEN);
>>>+
>>>+       slot = nfs4_find_slot(&session->fore_channel);
>>>+
>>>+       /* XXX: Do we always increment this? Are there any 
>>>      
>>>
>>errors for which we
>>    
>>
>>>+	* don't increment the sequence number?
>>>+	*/
>>>+       args->seqid = slot->seq_nr++;
>>>+       args->slotid = slot->slot_nr;
>>>+       args->maxslots = session->fore_channel.slot_table.max_slots;
>>>+      
>>>+       res->slot = slot;
>>> 
>>> 	return 0;
>>> }
>>>@@ -1685,11 +1802,6 @@ static int nfs4_proc_get_root(struct 
>>>      
>>>
>>nfs_server *server, struct nfs_fh *fhandle,
>>    
>>
>>> 	};
>>> 	int status;
>>> 
>>>-	if (server->rpc_ops->setup_sequence && (status =
>>>-	    
>>>      
>>>
>>server->rpc_ops->setup_sequence(server->nfs4_state->cl_session,
>>    
>>
>>>-	    &seqargs, &seqres)))
>>>-		return status;
>>>-
>>> 	/*
>>> 	 * Now we do a separate LOOKUP for each component of 
>>>      
>>>
>>the mount path.
>>    
>>
>>> 	 * The LOOKUPs are done separately so that we can 
>>>      
>>>
>>conveniently @@ 
>>    
>>
>>>-1714,9 +1826,21 @@ static int nfs4_proc_get_root(struct nfs_server 
>>>*server, struct nfs_fh *fhandle,
>>> 
>>> 		do {
>>> 			nfs_fattr_init(fattr);
>>>+        		if (server->rpc_ops->setup_sequence && 
>>>      
>>>
>>(status = 
>>    
>>
>>>+				server->rpc_ops->setup_sequence(
>>>+				server->nfs4_state->cl_session, 
>>>+				&seqargs, &seqres)))
>>>+	                        return status;
>>>+			
>>> 			status = nfs4_handle_exception(server,
>>> 					
>>>      
>>>
>>rpc_call_sync(server->client, &msg, 0),
>>    
>>
>>> 					&exception);
>>>+
>>>+        		if (server->rpc_ops->sequence_done)
>>>+		                server->rpc_ops->sequence_done(
>>>+						
>>>      
>>>
>>server->nfs4_state->cl_session, 
>>    
>>
>>>+						&seqres, status);
>>>+
>>> 		} while (exception.retry);
>>> 		if (status == 0)
>>> 			continue;
>>>@@ -1731,10 +1855,6 @@ static int nfs4_proc_get_root(struct 
>>>      
>>>
>>nfs_server *server, struct nfs_fh *fhandle,
>>    
>>
>>> 	if (status == 0)
>>> 		status = nfs4_do_fsinfo(server, fhandle, info);
>>> out:
>>>-	if (server->rpc_ops->sequence_done)
>>>-		
>>>      
>>>
>>server->rpc_ops->sequence_done(server->nfs4_state->cl_session,
>>    
>>
>>>-						&seqres, status);
>>>-
>>> 	return nfs4_map_errors(status);
>>> }
>>> 
>>>@@ -3268,6 +3388,7 @@ int nfs4_proc_async_sequence(struct 
>>>      
>>>
>>nfs4_client *clp, struct rpc_cred *cred)
>>    
>>
>>> 	ret = rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
>>> 	&nfs4_sequence_ops, (void *)jiffies);
>>> 
>>>+        nfs41_proc_sequence_done(clp->cl_session, res, ret);
>>> out:
>>> 	return ret;
>>> out_free:
>>>@@ -3581,14 +3702,6 @@ nfs4_async_handle_error(struct 
>>>      
>>>
>>rpc_task *task, const struct nfs_server *server)
>>    
>>
>>> 	return 0;
>>> }
>>> 
>>>-static int nfs4_wait_bit_interruptible(void *word) -{
>>>-	if (signal_pending(current))
>>>-		return -ERESTARTSYS;
>>>-	schedule();
>>>-	return 0;
>>>-}
>>>-
>>> static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct 
>>>nfs4_client *clp)  {
>>> 	sigset_t oldset;
>>>@@ -3823,6 +3936,111 @@ int nfs4_proc_get_lease_time(struct 
>>>      
>>>
>>nfs4_client *clp, struct nfs_fsinfo *fsinfo)
>>    
>>
>>> 	return status;
>>> }
>>> 
>>>+/* Initialize a slot table */
>>>+int nfs4_init_slot_table(struct nfs4_channel *channel) {
>>>+	int i;
>>>+	struct nfs4_slot_table *tbl;
>>>+	struct nfs4_slot *slot;
>>>+	
>>>+	tbl = &channel->slot_table;
>>>+	tbl->max_slots = channel->chan_attrs.max_reqs;
>>>+
>>>+	tbl->slots = kzalloc(tbl->max_slots * sizeof(struct 
>>>      
>>>
>>nfs4_slot), GFP_ATOMIC);
>>    
>>
>>>+	if (!tbl->slots)
>>>+		return -ENOMEM;
>>>+
>>>+	spin_lock_init(&tbl->slot_tbl_lock);
>>>+
>>>+	for (i = 0; i < tbl->max_slots; ++i) {
>>>+		slot = &tbl->slots[i];
>>>+
>>>+		slot->slot_nr = i;
>>>+		slot->seq_nr = 1;
>>>+		slot->flags = 0;
>>>+		slot->nr_waiters = 0;
>>>+		spin_lock_init(&slot->slot_lock);
>>>+	}
>>>+
>>>+	return 0;
>>>+}
>>>+
>>>+/* Destroy the slot table */
>>>+void nfs4_destroy_slot_table(struct nfs4_channel *channel) {
>>>+	int i;
>>>+	struct nfs4_slot *slot;
>>>+	struct nfs4_slot_table *tbl;
>>>+
>>>+	tbl = &channel->slot_table;
>>>+	
>>>+	for (i = 0; i < tbl->max_slots;++i) {
>>>+		slot = &tbl->slots[i];
>>>+
>>>+		if (slot->nr_waiters)
>>>+			BUG();
>>>+	}
>>>+
>>>+	kfree(channel->slot_table.slots);
>>>+	channel->slot_table.slots = NULL;
>>>+
>>>+	return;
>>>+}
>>>+
>>>+/* dump the channel attributes */
>>>+void nfs4_dump_channel_attrs(struct nfs4_channel_attrs *attrs) {
>>>+	printk(KERN_INFO "max_rqst_sz: %u\n", attrs->max_rqst_sz);
>>>+	printk(KERN_INFO "max_resp_sz: %u\n", attrs->max_resp_sz);
>>>+	printk(KERN_INFO "max_resp_sz_cached: %u\n", 
>>>      
>>>
>>attrs->max_resp_sz_cached);
>>    
>>
>>>+	printk(KERN_INFO "max_ops: %u\n", attrs->max_ops);
>>>+	printk(KERN_INFO "max_reqs: %u\n", attrs->max_reqs); }
>>>+
>>>+/* Initialize the values to be used by the client in 
>>>      
>>>
>>CREATE_SESSION 
>>    
>>
>>>+*/ void nfs4_init_channel_attrs(struct nfs4_client *clp,
>>>+				struct nfs4_channel_attrs *fc_attrs,
>>>+				struct nfs4_channel_attrs *bc_attrs) {
>>>+	/* XXX: We need to have good values here... 32K is a 
>>>      
>>>
>>wild guess */
>>    
>>
>>>+	fc_attrs->max_rqst_sz = bc_attrs->max_rqst_sz = 32768;
>>>+	fc_attrs->max_resp_sz = bc_attrs->max_resp_sz = 32768;
>>>+	fc_attrs->max_resp_sz_cached = 
>>>      
>>>
>>bc_attrs->max_resp_sz_cached = 32768;
>>    
>>
>>>+	fc_attrs->max_ops = bc_attrs->max_ops = 0xFFFFFFFF;
>>>+	fc_attrs->max_reqs = bc_attrs->max_reqs = 
>>>+				clp->cl_rpcclient->cl_xprt->max_reqs;
>>>+	fc_attrs->stream_attrs = bc_attrs->stream_attrs = 0;
>>>+	fc_attrs->rdma_attrs = bc_attrs->rdma_attrs = 0;
>>>+
>>>+}
>>>+
>>>+/* Check the values returned by the server for 
>>>      
>>>
>>CREATE_SESSION. Since 
>>    
>>
>>>+we made
>>>+ * our needs known, if the server gives us more than we need, we 
>>>+don't bother
>>>+ * with it.
>>>+ */
>>>+void nfs4_adjust_channel_attrs(struct nfs4_channel_attrs 
>>>      
>>>
>>*req_attrs,
>>    
>>
>>>+				struct nfs4_channel_attrs *resp_attrs) {
>>>+	if (req_attrs->max_rqst_sz < resp_attrs->max_rqst_sz)
>>>+		resp_attrs->max_rqst_sz = req_attrs->max_rqst_sz;
>>>+
>>>+	if (req_attrs->max_resp_sz < resp_attrs->max_resp_sz)
>>>+		resp_attrs->max_resp_sz = req_attrs->max_resp_sz;
>>>+
>>>+	if (req_attrs->max_resp_sz_cached < resp_attrs->max_resp_sz)
>>>+		resp_attrs->max_resp_sz = req_attrs->max_resp_sz_cached;
>>>+
>>>+	if (req_attrs->max_ops < resp_attrs->max_ops)
>>>+		resp_attrs->max_ops = req_attrs->max_ops;
>>>+	
>>>+	if (req_attrs->max_reqs < resp_attrs->max_reqs)
>>>+		resp_attrs->max_reqs = req_attrs->max_reqs;
>>>+
>>>+	/* XXX: We ignore the stream channel attributes... we 
>>>      
>>>
>>have no idea what
>>    
>>
>>>+	 * to do with them anyways!
>>>+	 */
>>>+}
>>>+
>>> int _nfs4_proc_create_session(struct nfs4_client *clp, struct 
>>>nfs4_session *session, struct rpc_clnt *clnt)  {
>>> 	struct nfs41_create_session_args args = { @@ -3845,7 
>>>      
>>>
>>+4063,19 @@ int 
>>    
>>
>>>_nfs4_proc_create_session(struct nfs4_client *clp, struct 
>>>      
>>>
>>nfs4_session *sess
>>    
>>
>>> 	};
>>> 	int status;
>>> 
>>>-	status = rpc_call_sync(clnt, &msg, 0);
>>>+	nfs4_init_channel_attrs(clp, &args.fc_attrs, &args.bc_attrs);
>>>+	
>>>+        status = rpc_call_sync(clnt, &msg, 0);
>>>+
>>>+	/* Set the negotiated values in the session's 
>>>      
>>>
>>channel_attrs struct 
>>    
>>
>>>+*/
>>>+
>>>+	if (!status) {
>>>+		nfs4_adjust_channel_attrs(&args.fc_attrs, 
>>>+					
>>>      
>>>
>>&session->fore_channel.chan_attrs);
>>    
>>
>>>+		nfs4_adjust_channel_attrs(&args.bc_attrs, 
>>>+					
>>>      
>>>
>>&session->back_channel.chan_attrs);
>>    
>>
>>>+	}
>>>+	
>>> 	return status;
>>> }
>>> EXPORT_SYMBOL(_nfs4_proc_create_session);
>>>@@ -3853,29 +4083,28 @@ EXPORT_SYMBOL(_nfs4_proc_create_session);
>>> int nfs4_proc_create_session(struct nfs4_client *clp)  {
>>> 	int status;
>>>-	unsigned long now;
>>> 	struct nfs4_session *session;
>>>-	struct nfs_fsinfo fsinfo;
>>> 	u32 *ptr;
>>> 
>>>-	now = jiffies;
>>>-
>>> 	status = _nfs4_proc_create_session(clp, 
>>>      
>>>
>>clp->cl_session, clp->cl_rpcclient);
>>    
>>
>>> 	if (status)
>>> 		return status;
>>> 
>>>-	status = nfs4_proc_get_lease_time(clp, &fsinfo);
>>>-	if (status == 0) {
>>>-		/* Update lease time and schedule renewal */
>>>-		spin_lock(&clp->cl_lock);
>>>-		clp->cl_lease_time = fsinfo.lease_time * HZ;
>>>-		clp->cl_last_renewal = now;
>>>-		clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
>>>-		spin_unlock(&clp->cl_lock);
>>>+	session = clp->cl_session;
>>> 
>>>-		nfs4_schedule_state_renewal(clp);
>>>+	/* Init the fore channel */
>>>+	status = nfs4_init_slot_table(&session->fore_channel);
>>>+	dprintk("fc init returned %d\n", status);
>>>+	if (status)
>>>+		return status;
>>>+
>>>+	/* Init the back channel */
>>>+	status = nfs4_init_slot_table(&session->back_channel);
>>>+	dprintk("bc init returned %d\n", status);
>>>+	if (status) {
>>>+		nfs4_destroy_slot_table(&session->fore_channel);
>>>+		return status;
>>> 	}
>>>-	session = clp->cl_session;
>>> 
>>> 	ptr = (int *)session->sess_id;
>>> 	dprintk("sessionid is: %d:%d:%d:%d\n", ptr[0], ptr[1], ptr[2], 
>>>ptr[3]); @@ -4769,11 +4998,8 @@ struct nfs4_session 
>>>*nfs41_alloc_session(void)
>>> 
>>> 	session->expired = 1;
>>> 
>>>-	INIT_LIST_HEAD(&session->slots_in_use);
>>>-	INIT_LIST_HEAD(&session->unused_slots);
>>> 	INIT_LIST_HEAD(&session->session_hashtbl);
>>> 
>>>-	//rpc_init_wait_queue(&session->slot_waitq, "Slot waitqueue");
>>> 	spin_lock_init(&session->session_lock);
>>> 
>>> 	sema_init(&session->session_sem, 1); @@ -4797,6 +5023,8 @@ void 
>>>nfs4_get_session(struct nfs4_session *session)  void 
>>>nfs4_put_session(struct nfs4_session **session)  {
>>> 	if (atomic_dec_and_test(&((*session)->ref_count))) {
>>>+		nfs4_destroy_slot_table(&((*session)->fore_channel));
>>>+		nfs4_destroy_slot_table(&((*session)->back_channel));
>>> 		nfs41_free_session(*session);
>>> 		*session = NULL;
>>> 	}
>>>@@ -4806,7 +5034,11 @@ int nfs41_proc_setup_session(struct 
>>>      
>>>
>>nfs4_client 
>>    
>>
>>>*clp)  {
>>> 	int status;
>>> 
>>>-	dprintk("in %s!\n", __FUNCTION__);
>>>+        struct nfs_fsinfo fsinfo;
>>>+	unsigned long now;
>>>+	
>>>+	now = jiffies;
>>>+
>>> 	if (!clp->cl_session) {
>>> 		/* create the session struct to hold the 
>>>      
>>>
>>session parameters */
>>    
>>
>>> 		clp->cl_session = nfs41_alloc_session(); @@ 
>>>      
>>>
>>-4844,6 +5076,20 @@ int 
>>    
>>
>>>nfs41_proc_setup_session(struct nfs4_client *clp)
>>> 	clp->cl_session->expired = 0;
>>> 	clp->cl_session->client = clp;
>>> 
>>>+	status = nfs4_proc_get_lease_time(clp, &fsinfo);
>>>+
>>>+	if (status) 
>>>+		goto out_free;
>>>+	
>>>+	/* Update lease time and schedule renewal */
>>>+	spin_lock(&clp->cl_lock);
>>>+	clp->cl_lease_time = fsinfo.lease_time * HZ;
>>>+	clp->cl_last_renewal = now;
>>>+	clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
>>>+	spin_unlock(&clp->cl_lock);
>>>+
>>>+	nfs4_schedule_state_renewal(clp);
>>>+	
>>> 	clp->cl_session->mutating = 0;
>>> out:
>>> 	up(&clp->cl_session->session_sem);
>>>diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 
>>>5ef6d70..7da9a6a 100644
>>>--- a/fs/nfs/nfs4xdr.c
>>>+++ b/fs/nfs/nfs4xdr.c
>>>@@ -1429,20 +1429,22 @@ static int 
>>>      
>>>
>>encode_create_session(struct xdr_stream *xdr, struct nfs41_create_ses
>>    
>>
>>> 	RESERVE_SPACE(2*28);                    /* 2 channel_attrs */
>>> 
>>> 	/* Fore Channel */
>>>-	WRITE32(32768);                         /* max req size */
>>>-	WRITE32(32768);                         /* max resp size */
>>>-	WRITE32(32768);                         /* Max resp 
>>>      
>>>
>>size cached */
>>    
>>
>>>-	WRITE32(RPC_DEF_SLOT_TABLE);            /* max operations */
>>>-	WRITE32(RPC_DEF_SLOT_TABLE);            /* max requests */
>>>+	WRITE32(args->fc_attrs.max_rqst_sz);	/* max req size */
>>>+	WRITE32(args->fc_attrs.max_resp_sz);	/* max resp size */
>>>+	WRITE32(args->fc_attrs.max_resp_sz_cached);	/* Max 
>>>      
>>>
>>resp sz cached */
>>    
>>
>>>+	WRITE32(args->fc_attrs.max_ops);	/* max operations */
>>>+	WRITE32(args->fc_attrs.max_reqs);	/* max requests */
>>>+
>>> 	WRITE32(0);                             /* 
>>>      
>>>
>>Streamchannel attrs */
>>    
>>
>>> 	WRITE32(0);                             /*rdmachannel_attrs */
>>> 
>>> 	/* Back Channel */
>>>-	WRITE32(32768);                         /* max req size */
>>>-	WRITE32(32768);                         /* max resp size */
>>>-	WRITE32(32768);                         /* Max resp 
>>>      
>>>
>>size cached */
>>    
>>
>>>-	WRITE32(RPC_DEF_SLOT_TABLE);            /* max operations */
>>>-	WRITE32(RPC_DEF_SLOT_TABLE);            /* max requests */
>>>+	WRITE32(args->bc_attrs.max_rqst_sz);	/* max req size */
>>>+	WRITE32(args->bc_attrs.max_resp_sz);	/* max resp size */
>>>+	WRITE32(args->bc_attrs.max_resp_sz_cached);	/* Max 
>>>      
>>>
>>resp sz cached */
>>    
>>
>>>+	WRITE32(args->bc_attrs.max_ops);	/* max operations */
>>>+	WRITE32(args->bc_attrs.max_reqs);	/* max requests */
>>>+
>>> 	WRITE32(0);                             /* 
>>>      
>>>
>>Streamchannel attrs */
>>    
>>
>>> 	WRITE32(0);                             /*rdmachannel_attrs */
>>> 
>>>@@ -6404,7 +6406,7 @@ static int 
>>>      
>>>
>>nfs41_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p, str
>>    
>>
>>> 		status = decode_putrootfh(&xdr);
>>> 	if (!status)
>>> 		status = decode_fsinfo(&xdr, res->fsinfo);
>>>-	if (!status)
>>>+	if (status)
>>> 		status = -nfs_stat_to_errno(hdr.status);
>>> 	return status;
>>> }
>>>diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a3bc630..89eb564 
>>>100644
>>>--- a/fs/nfs/super.c
>>>+++ b/fs/nfs/super.c
>>>@@ -1260,15 +1260,13 @@ static int nfs4_fill_super(struct 
>>>      
>>>
>>super_block *sb, struct nfs4_mount_data *data,
>>    
>>
>>>                 server->nfs4_state->cl_minorversion = i;
>>> 
>>>                 if (server->rpc_ops->setup_session) {
>>>-                        int status;
>>>-
>>>                         lock_kernel();
>>>                         down_write(&server->nfs4_state->cl_sem);
>>>-                        status =  
>>>      
>>>
>>server->rpc_ops->setup_session(server->nfs4_state);
>>    
>>
>>>+                        err =  
>>>+ server->rpc_ops->setup_session(server->nfs4_state);
>>>                         up_write(&server->nfs4_state->cl_sem);
>>>                         unlock_kernel();
>>> 
>>>-                        if (status) {
>>>+                        if (err) {
>>>                                 printk(KERN_EMERG 
>>>      
>>>
>>"Couldn't mount using minorversion %d\n", i);
>>    
>>
>>>                                 
>>>      
>>>
>>rpc_shutdown_client(server->client);
>>    
>>
>>>                         }
>>>diff --git a/include/linux/nfs41_sessions.h 
>>>b/include/linux/nfs41_sessions.h new file mode 100644 index 
>>>0000000..02f45db
>>>--- /dev/null
>>>+++ b/include/linux/nfs41_sessions.h
>>>@@ -0,0 +1,72 @@
>>>+#ifndef __NFS4_1_SESSIONS_H__
>>>+#define __NFS4_1_SESSIONS_H__
>>>+
>>>+/* The flags for the nfs4_slot struct */
>>>+#define NFS4_SLOT_BUSY		0X0	/* Slot in use */
>>>+#define NFS4_SLOT_RECLAIMED	0x1	/* Slot has 
>>>      
>>>
>>been reclaimed by
>>    
>>
>>>+					   the server */
>>>+
>>>+typedef unsigned char	 	sessionid_t[16];
>>>+typedef u32			streamchannel_attrs;
>>>+typedef u32			rdmachannel_attrs;
>>>+
>>>+struct nfs4_channel_attrs {
>>>+	u32			max_rqst_sz;
>>>+	u32			max_resp_sz;
>>>+	u32			max_resp_sz_cached;
>>>+	u32			max_ops;
>>>+	u32			max_reqs;
>>>+	streamchannel_attrs	stream_attrs;
>>>+	rdmachannel_attrs	rdma_attrs;
>>>+};
>>>+
>>>+struct nfs4_slot {
>>>+	u32		 	slot_nr;
>>>+	u32		 	seq_nr;
>>>+	unsigned long		flags;
>>>+	u32	 		nr_waiters;
>>>+	spinlock_t		slot_lock;
>>>+};
>>>+
>>>+struct nfs4_slot_table {
>>>+	struct nfs4_slot	*slots;
>>>+	u32			max_slots;
>>>+	spinlock_t		slot_tbl_lock;
>>>+};
>>>+
>>>+struct nfs4_channel {
>>>+	struct nfs4_channel_attrs 	chan_attrs;
>>>+	struct rpc_clnt 		*rpc_client;
>>>+	struct nfs4_slot_table		slot_table;
>>>+};
>>>+
>>>+struct nfs4_session {
>>>+	/* Session related params */
>>>+	sessionid_t			sess_id;
>>>+	u32				seqid;	/* The seqid 
>>>      
>>>
>>returned by 
>>    
>>
>>>+						   exchange_id */
>>>+	u32				persist;
>>>+	u32				header_padding;
>>>+	u32				hash_alg;
>>>+	u32				ssv_len;
>>>+	u32				use_for_back_chan;
>>>+	u32				rdma_mode;
>>>+
>>>+	/* The fore and back channel */
>>>+	struct nfs4_channel		fore_channel;
>>>+	struct nfs4_channel		back_channel;
>>>+
>>>+	unsigned int			expired;
>>>+	struct nfs4_client *		client;
>>>+	struct list_head		session_hashtbl;
>>>+	spinlock_t 			session_lock;
>>>+	/* To prevent races between create_session and sequence */
>>>+	int 				mutating;
>>>+	struct semaphore		session_sem;
>>>+	atomic_t			ref_count;
>>>+};
>>>+
>>>+
>>>+#endif
>>>+
>>>+
>>>diff --git a/include/linux/nfs_xdr.h 
>>>      
>>>
>>b/include/linux/nfs_xdr.h index 
>>    
>>
>>>4648707..52b9e38 100644
>>>--- a/include/linux/nfs_xdr.h
>>>+++ b/include/linux/nfs_xdr.h
>>>@@ -3,6 +3,7 @@
>>> 
>>> #include <linux/sunrpc/xprt.h>
>>> #include <linux/nfsacl.h>
>>>+#include <linux/nfs41_sessions.h>
>>> 
>>> /*
>>>  * To change the maximum rsize and wsize supported by the 
>>>      
>>>
>>NFS client, 
>>    
>>
>>>adjust @@ -778,6 +779,8 @@ struct nfs41_create_session_args {
>>> 	uint32_t			use_for_backchannel;
>>> 	uint32_t			use_for_rdma;
>>> 	uint32_t			cb_program;
>>>+	struct nfs4_channel_attrs	fc_attrs;	/* Fore 
>>>      
>>>
>>Channel */
>>    
>>
>>>+	struct nfs4_channel_attrs	bc_attrs;	/* Back 
>>>      
>>>
>>Channel */
>>    
>>
>>> };
>>> 
>>> struct nfs41_create_session_res {
>>>@@ -805,6 +808,7 @@ struct nfs41_sequence_res {
>>> 	u32				target_maxslots;
>>> 	u32				status_flags;
>>> 	struct nfs4_state_owner		*sp;
>>>+	struct nfs4_slot		*slot;
>>> };
>>> 
>>> struct nfs4_get_lease_time_args {
>>>diff --git a/include/linux/nfsd/state.h 
>>>      
>>>
>>b/include/linux/nfsd/state.h 
>>    
>>
>>>index 8f699b4..fde667e 100644
>>>--- a/include/linux/nfsd/state.h
>>>+++ b/include/linux/nfsd/state.h
>>>@@ -47,8 +47,6 @@ typedef struct {
>>> 	u32             cl_id;
>>> } clientid_t;
>>> 
>>>-typedef unsigned char sessionid_t[16];
>>>-
>>> typedef struct {
>>> 	u32             so_boot;
>>> 	u32             so_stateownerid;
>>>      
>>>



More information about the pNFS mailing list