[pnfs] [PATCH 1/2] Slot Table Implementation

Iyer, Rahul Rahul.Iyer at netapp.com
Fri Mar 16 21:03:17 EDT 2007


Hi Benny,
A comment inline...
Regards
Rahul


> -----Original Message-----
> From: Benny Halevy [mailto:bhalevy at panasas.com] 
> Sent: Friday, March 16, 2007 2:24 AM
> To: Iyer, Rahul
> Cc: pnfs at linux-nfs.org
> Subject: Re: [pnfs] [PATCH 1/2] Slot Table Implementation
> 
> Rahul, my comments below...
> 
> Benny
> 
> iyer at netapp.com wrote:
> > From: iyer <iyer at netapp.com>
> > 
> > Added code to implement a slot table.
> > 
> > Signed-off-by: iyer <iyer at netapp.com>
> > ---
> >  fs/nfs/nfs41_sessions.h        |   62 --------
> >  fs/nfs/nfs4_fs.h               |    2 +-
> >  fs/nfs/nfs4proc.c              |  326 
> +++++++++++++++++++++++++++++++++++-----
> >  fs/nfs/nfs4xdr.c               |   24 ++--
> >  fs/nfs/super.c                 |    6 +-
> >  include/linux/nfs41_sessions.h |   72 +++++++++
> >  include/linux/nfs_xdr.h        |    4 +
> >  include/linux/nfsd/state.h     |    2 -
> >  8 files changed, 378 insertions(+), 120 deletions(-)  delete mode 
> > 100644 fs/nfs/nfs41_sessions.h  create mode 100644 
> > include/linux/nfs41_sessions.h
> > 
> > diff --git a/fs/nfs/nfs41_sessions.h 
> b/fs/nfs/nfs41_sessions.h deleted 
> > file mode 100644 index fc658c5..0000000
> > --- a/fs/nfs/nfs41_sessions.h
> > +++ /dev/null
> > @@ -1,62 +0,0 @@
> > -#ifndef __NFS4_1_SESSIONS_H__
> > -#define __NFS4_1_SESSIONS_H__
> > -
> > -typedef unsigned char		sessionid_t[16];
> > -typedef u32			streamchannel_attrs;
> > -typedef u32			rdmachannel_attrs;
> > -
> > -struct nfs4_channel_attrs {
> > -	unsigned long		max_rqst_sz;
> > -	unsigned long		max_resp_sz;
> > -	unsigned long		max_resp_sz_cached;
> > -	unsigned long		max_ops;
> > -	unsigned long		max_reqs;
> > -	streamchannel_attrs	stream_attrs;
> > -	rdmachannel_attrs	rdma_attrs;
> > -};
> > -
> > -struct nfs4_channel {
> > -	struct nfs4_channel_attrs	chan_attrs;
> > -	unsigned long			nr_conns;
> > -	struct list_head		rpc_clients;
> > -};
> > -
> > -struct nfs4_session {
> > -	/* Session related params */
> > -	sessionid_t		sess_id;
> > -	u32			seqid;  /* The seqid returned 
> by exchange_id */
> > -	u32			persist;
> > -	u32			header_padding;
> > -	u32			hash_alg;
> > -	u32			ssv_len;
> > -	u32			use_for_back_chan;
> > -	u32			rdma_mode;
> > -
> > -	/* Slotid management */
> > -	unsigned long		nr_slots_in_use;
> > -	struct list_head	slots_in_use;
> > -	struct list_head	unused_slots;
> > -	struct rpc_wait_queue	slot_waitq;
> > -
> > -	/* The fore and back channel */
> > -	struct nfs4_channel	fore_channel;
> > -	struct nfs4_channel	back_channel;
> > -
> > -	unsigned int		expired;
> > -	struct nfs4_client *	client;
> > -	struct list_head	session_hashtbl;
> > -	spinlock_t		session_lock;
> > -	/* To prevent races between create_session and sequence */
> > -	int			mutating;
> > -	struct semaphore	session_sem;
> > -	atomic_t	ref_count;
> > -};
> > -
> > -struct nfs4_slot {
> > -	u32			slot_nr;
> > -	u32			seq_nr;
> > -	struct nfs4_session *	session;
> > -	struct list_head	slot_list;
> > -};
> > -
> > -#endif
> > diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 
> > 195d757..e50657e 100644
> > --- a/fs/nfs/nfs4_fs.h
> > +++ b/fs/nfs/nfs4_fs.h
> > @@ -9,7 +9,7 @@
> >  #ifndef __LINUX_FS_NFS_NFS4_FS_H
> >  #define __LINUX_FS_NFS_NFS4_FS_H
> >  
> > -#include "nfs41_sessions.h"
> > +#include <linux/nfs41_sessions.h>
> >  
> >  #ifdef CONFIG_NFS_V4
> >  #define NFSV4_MAX_MINORVERSION 1
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 
> > 167ff4a..28513a1 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -49,6 +49,7 @@
> >  #include <linux/namei.h>
> >  #include <linux/mount.h>
> >  #include <linux/module.h>
> > +#include <linux/bitops.h>
> >  
> >  #include "nfs4_fs.h"
> >  #include "delegation.h"
> > @@ -221,8 +222,10 @@ static int 
> nfs41_proc_sequence_done(struct nfs4_session *session, struct nfs41_s
> >  	unsigned long timestamp;
> >  	struct nfs4_client *clp;
> >  
> > -	if (!session || !(clp = session->client))
> > -		return 0;
> > +	if (!session || !(clp = session->client)) {
> > +		printk(KERN_EMERG "%s is NULL!!!\n", 
> (!session)?"session":"clp");
> > +		BUG();
> > +	}
> 
> For performance reasons the defensive code should only be 
> active with kernel debugging.  The conditional statement 
> above is always executed needlessly and is not as clean as it 
> could be because of the assignment embedded in the boolean 
> expression.  Therefore, this better be coded like this:
> 
> 	BUG_ON(!session);
> 	clp = session->client;
> 	BUG_ON(!clp);
> 
> >  
> >  	if (!status) {
> >  		timestamp = jiffies;
> > @@ -233,17 +236,131 @@ static int 
> nfs41_proc_sequence_done(struct nfs4_session *session, struct nfs41_s
> >  		spin_unlock(&clp->cl_lock);
> >  	}
> >  
> > +	/* Clear the 'busy' bit on the slot that was used */
> > +	smp_mb__before_clear_bit();
> > +	clear_bit(NFS4_SLOT_BUSY, &res->slot->flags);
> > +	smp_mb__after_clear_bit();
> > +
> > +	printk(KERN_EMERG "waking up waiters on slot %d\n", 
> > +res->slot->slot_nr);
> > +
> > +	/* Wake up the threads waiting on this slot */
> > +	wake_up_bit(&res->slot->flags, NFS4_SLOT_BUSY);
> > +
> >  	return status;
> >  }
> >  
> > +static int nfs4_wait_bit_interruptible(void *word) {
> > +	if (signal_pending(current))
> > +		return -ERESTARTSYS;
> > +	schedule();
> > +	return 0;
> > +}
> > +
> > +/* Find the lowest numbered slot or sleep on the least 
> loaded slot */ 
> > +struct nfs4_slot *nfs4_find_slot(struct nfs4_channel *channel) {
> > +	struct nfs4_slot_table *tbl;
> > +	struct nfs4_slot *slot;
> > +	struct nfs4_slot *target_slot;
> > +	u32 max_slots;
> > +	u32 min_waiters;
> > +	int i;
> > +	int need_to_sleep;
> > +	
> > +	might_sleep();
> > +
> > +	tbl = &channel->slot_table;
> > +	min_waiters = tbl->slots[0].nr_waiters;
> 
> atomic_read()
> 
> > +	target_slot = &tbl->slots[0];
> > +
> > +	spin_lock(&tbl->slot_tbl_lock);
> > +	/* Make a local copy of max slots so we don't need to 
> hold it through
> > +	 * out. 
> > +	 * XXX Will need to revalidate this if slots are reclaimed
> > +	 */
> > +	max_slots = tbl->max_slots;
> > +	spin_unlock(&tbl->slot_tbl_lock);
> 
> Using a spinlock here is unreasonably expensive when you 
> could use atomic counters since the lock overhead is not 
> amortized over multiple accesses to the structure.
> max_slots and nr_waiters should be defined as atomic_t and 
> atomic operations be used to access them; Alternatively use 
> one spinlock for the whole channel around the outer loop
> 
> > +
> > +	do {
> > +		need_to_sleep = 1;
> > +		for (i = 0; i <  max_slots; ++i) {
> > +			slot = &tbl->slots[i];
> > +			if (!test_and_set_bit(NFS4_SLOT_BUSY, 
> &slot->flags)){
> > +				/* We found an empty slot */
> > +				target_slot = slot;
> > +				need_to_sleep = 0;
> > +				break;
> > +			}
> > +
> > +			spin_lock(&slot->slot_lock);
> > +			if (min_waiters > slot->nr_waiters) {
> > +				min_waiters = slot->nr_waiters;
> > +				target_slot = slot;
> > +			}
> > +			spin_unlock(&slot->slot_lock);
> 
> why lock? use atomic_read(&slot->nr_waiters) also, if several 
> threads go through this code in parallel they will pick the 
> same target_slot so you want to increment.
> That's another reason to use a bigger lock.

Using atomic_t is a bit hard when you have to do compare and set kind of
operations...
Like for instance:

If (min_waiters > slot->nr_waiters) {
	min_waiters = slot->nr_waiters;
	target_slot = slot;
}

Doing the above atomically is hard with atomic variables only because
the whole sequence needs to be atomic. However, I can change the
spin_lock used to rwlock_t. This should better solve the problem. Right?

> 
> > +			printk(KERN_EMERG "slot %d has busy bit 
> %s and nr_waiters is %u\n"
> > +					, slot->slot_nr, 
> > +					(need_to_sleep)?"set":"unset", 
> > +					slot->nr_waiters);
> > +		}
> > +	
> > +		/* Check whether we need to sleep. If so, sleep 
> on the BUSY bit 
> > +		 * Increment the nr_waiters before sleeping and 
> decrement it 
> > +		 * when woken up
> > +		 */
> > +		if (need_to_sleep) {
> > +			printk(KERN_EMERG "sleeping on slot %d; 
> seq_nr: %d\n",
> > +					target_slot->slot_nr, 
> > +					target_slot->seq_nr);
> > +			spin_lock(&target_slot->slot_lock);
> > +			++target_slot->nr_waiters;
> > +			spin_unlock(&target_slot->slot_lock);
> 
> atomic_inc()
> 
> > +	
> > +			/* XXX: We need to check the return value of
> > +			 * eait_on_bit so that we can check if it's
> > +			 * interrupted.
> > +			 */
> > +			wait_on_bit(&target_slot->flags, 
> NFS4_SLOT_BUSY, 
> > +					nfs4_wait_bit_interruptible,
> > +					TASK_INTERRUPTIBLE);
> > +
> > +			spin_lock(&target_slot->slot_lock);
> > +			--target_slot->nr_waiters;
> > +			spin_unlock(&target_slot->slot_lock);
> 
> atomic_dec()
> 
> But wait a second, why do you want a queue per slot to begin with?
> It seems like you can just go with a semaphore initialized to 
> the number of slots, do down_interruptible before scanning 
> for a free slot and up in nfs41_proc_sequence_done. Take a 
> spin lock to scan the slot table (last index can be saved and 
> rotated for uniformity) and if not free slot is found do a 
> BUG() since the semaphore should prevent that from happening;
> 
> 
> > +		}
> > +		else
> > +			break;
> > +	}while (test_and_set_bit(NFS4_SLOT_BUSY, &target_slot->flags));
> > +
> > +	printk(KERN_EMERG "slot id: %u\nseqid: %u\n max_slots: %u\n", 
> > +			target_slot->slot_nr, target_slot->seq_nr, 
> > +				max_slots);
> > +	
> > +	return target_slot;
> > +}
> > +	
> >  static int _nfs41_proc_setup_sequence(struct nfs4_session 
> *session, 
> > struct nfs41_sequence_args *args, struct nfs41_sequence_res *res)  {
> >  	u32 *ptr;
> > -
> > +	struct nfs4_slot *slot;
> > +	
> >  	ptr = (u32 *)session->sess_id;
> >  	dprintk("%s: %u:%u:%u:%u\n", __FUNCTION__, ptr[0], 
> ptr[1], ptr[2], 
> > ptr[3]);
> >  
> > -	memcpy(args->sessionid, (unsigned char 
> *)session->sess_id, NFS4_MAX_SESSIONID_LEN);
> > +	memcpy(args->sessionid, (unsigned char *)session->sess_id, 
> > +					NFS4_MAX_SESSIONID_LEN);
> > +
> > +       slot = nfs4_find_slot(&session->fore_channel);
> > +
> > +       /* XXX: Do we always increment this? Are there any 
> errors for which we
> > +	* don't increment the sequence number?
> > +	*/
> > +       args->seqid = slot->seq_nr++;
> > +       args->slotid = slot->slot_nr;
> > +       args->maxslots = session->fore_channel.slot_table.max_slots;
> > +      
> > +       res->slot = slot;
> >  
> >  	return 0;
> >  }
> > @@ -1685,11 +1802,6 @@ static int nfs4_proc_get_root(struct 
> nfs_server *server, struct nfs_fh *fhandle,
> >  	};
> >  	int status;
> >  
> > -	if (server->rpc_ops->setup_sequence && (status =
> > -	    
> server->rpc_ops->setup_sequence(server->nfs4_state->cl_session,
> > -	    &seqargs, &seqres)))
> > -		return status;
> > -
> >  	/*
> >  	 * Now we do a separate LOOKUP for each component of 
> the mount path.
> >  	 * The LOOKUPs are done separately so that we can 
> conveniently @@ 
> > -1714,9 +1826,21 @@ static int nfs4_proc_get_root(struct nfs_server 
> > *server, struct nfs_fh *fhandle,
> >  
> >  		do {
> >  			nfs_fattr_init(fattr);
> > +        		if (server->rpc_ops->setup_sequence && 
> (status = 
> > +				server->rpc_ops->setup_sequence(
> > +				server->nfs4_state->cl_session, 
> > +				&seqargs, &seqres)))
> > +	                        return status;
> > +			
> >  			status = nfs4_handle_exception(server,
> >  					
> rpc_call_sync(server->client, &msg, 0),
> >  					&exception);
> > +
> > +        		if (server->rpc_ops->sequence_done)
> > +		                server->rpc_ops->sequence_done(
> > +						
> server->nfs4_state->cl_session, 
> > +						&seqres, status);
> > +
> >  		} while (exception.retry);
> >  		if (status == 0)
> >  			continue;
> > @@ -1731,10 +1855,6 @@ static int nfs4_proc_get_root(struct 
> nfs_server *server, struct nfs_fh *fhandle,
> >  	if (status == 0)
> >  		status = nfs4_do_fsinfo(server, fhandle, info);
> >  out:
> > -	if (server->rpc_ops->sequence_done)
> > -		
> server->rpc_ops->sequence_done(server->nfs4_state->cl_session,
> > -						&seqres, status);
> > -
> >  	return nfs4_map_errors(status);
> >  }
> >  
> > @@ -3268,6 +3388,7 @@ int nfs4_proc_async_sequence(struct 
> nfs4_client *clp, struct rpc_cred *cred)
> >  	ret = rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
> >  	&nfs4_sequence_ops, (void *)jiffies);
> >  
> > +        nfs41_proc_sequence_done(clp->cl_session, res, ret);
> >  out:
> >  	return ret;
> >  out_free:
> > @@ -3581,14 +3702,6 @@ nfs4_async_handle_error(struct 
> rpc_task *task, const struct nfs_server *server)
> >  	return 0;
> >  }
> >  
> > -static int nfs4_wait_bit_interruptible(void *word) -{
> > -	if (signal_pending(current))
> > -		return -ERESTARTSYS;
> > -	schedule();
> > -	return 0;
> > -}
> > -
> >  static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct 
> > nfs4_client *clp)  {
> >  	sigset_t oldset;
> > @@ -3823,6 +3936,111 @@ int nfs4_proc_get_lease_time(struct 
> nfs4_client *clp, struct nfs_fsinfo *fsinfo)
> >  	return status;
> >  }
> >  
> > +/* Initialize a slot table */
> > +int nfs4_init_slot_table(struct nfs4_channel *channel) {
> > +	int i;
> > +	struct nfs4_slot_table *tbl;
> > +	struct nfs4_slot *slot;
> > +	
> > +	tbl = &channel->slot_table;
> > +	tbl->max_slots = channel->chan_attrs.max_reqs;
> > +
> > +	tbl->slots = kzalloc(tbl->max_slots * sizeof(struct 
> nfs4_slot), GFP_ATOMIC);
> > +	if (!tbl->slots)
> > +		return -ENOMEM;
> > +
> > +	spin_lock_init(&tbl->slot_tbl_lock);
> > +
> > +	for (i = 0; i < tbl->max_slots; ++i) {
> > +		slot = &tbl->slots[i];
> > +
> > +		slot->slot_nr = i;
> > +		slot->seq_nr = 1;
> > +		slot->flags = 0;
> > +		slot->nr_waiters = 0;
> > +		spin_lock_init(&slot->slot_lock);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +/* Destroy the slot table */
> > +void nfs4_destroy_slot_table(struct nfs4_channel *channel) {
> > +	int i;
> > +	struct nfs4_slot *slot;
> > +	struct nfs4_slot_table *tbl;
> > +
> > +	tbl = &channel->slot_table;
> > +	
> > +	for (i = 0; i < tbl->max_slots;++i) {
> > +		slot = &tbl->slots[i];
> > +
> > +		if (slot->nr_waiters)
> > +			BUG();
> > +	}
> > +
> > +	kfree(channel->slot_table.slots);
> > +	channel->slot_table.slots = NULL;
> > +
> > +	return;
> > +}
> > +
> > +/* dump the channel attributes */
> > +void nfs4_dump_channel_attrs(struct nfs4_channel_attrs *attrs) {
> > +	printk(KERN_INFO "max_rqst_sz: %u\n", attrs->max_rqst_sz);
> > +	printk(KERN_INFO "max_resp_sz: %u\n", attrs->max_resp_sz);
> > +	printk(KERN_INFO "max_resp_sz_cached: %u\n", 
> attrs->max_resp_sz_cached);
> > +	printk(KERN_INFO "max_ops: %u\n", attrs->max_ops);
> > +	printk(KERN_INFO "max_reqs: %u\n", attrs->max_reqs); }
> > +
> > +/* Initialize the values to be used by the client in 
> CREATE_SESSION 
> > +*/ void nfs4_init_channel_attrs(struct nfs4_client *clp,
> > +				struct nfs4_channel_attrs *fc_attrs,
> > +				struct nfs4_channel_attrs *bc_attrs) {
> > +	/* XXX: We need to have good values here... 32K is a 
> wild guess */
> > +	fc_attrs->max_rqst_sz = bc_attrs->max_rqst_sz = 32768;
> > +	fc_attrs->max_resp_sz = bc_attrs->max_resp_sz = 32768;
> > +	fc_attrs->max_resp_sz_cached = 
> bc_attrs->max_resp_sz_cached = 32768;
> > +	fc_attrs->max_ops = bc_attrs->max_ops = 0xFFFFFFFF;
> > +	fc_attrs->max_reqs = bc_attrs->max_reqs = 
> > +				clp->cl_rpcclient->cl_xprt->max_reqs;
> > +	fc_attrs->stream_attrs = bc_attrs->stream_attrs = 0;
> > +	fc_attrs->rdma_attrs = bc_attrs->rdma_attrs = 0;
> > +
> > +}
> > +
> > +/* Check the values returned by the server for 
> CREATE_SESSION. Since 
> > +we made
> > + * our needs known, if the server gives us more than we need, we 
> > +don't bother
> > + * with it.
> > + */
> > +void nfs4_adjust_channel_attrs(struct nfs4_channel_attrs 
> *req_attrs,
> > +				struct nfs4_channel_attrs *resp_attrs) {
> > +	if (req_attrs->max_rqst_sz < resp_attrs->max_rqst_sz)
> > +		resp_attrs->max_rqst_sz = req_attrs->max_rqst_sz;
> > +
> > +	if (req_attrs->max_resp_sz < resp_attrs->max_resp_sz)
> > +		resp_attrs->max_resp_sz = req_attrs->max_resp_sz;
> > +
> > +	if (req_attrs->max_resp_sz_cached < resp_attrs->max_resp_sz)
> > +		resp_attrs->max_resp_sz = req_attrs->max_resp_sz_cached;
> > +
> > +	if (req_attrs->max_ops < resp_attrs->max_ops)
> > +		resp_attrs->max_ops = req_attrs->max_ops;
> > +	
> > +	if (req_attrs->max_reqs < resp_attrs->max_reqs)
> > +		resp_attrs->max_reqs = req_attrs->max_reqs;
> > +
> > +	/* XXX: We ignore the stream channel attributes... we 
> have no idea what
> > +	 * to do with them anyways!
> > +	 */
> > +}
> > +
> >  int _nfs4_proc_create_session(struct nfs4_client *clp, struct 
> > nfs4_session *session, struct rpc_clnt *clnt)  {
> >  	struct nfs41_create_session_args args = { @@ -3845,7 
> +4063,19 @@ int 
> > _nfs4_proc_create_session(struct nfs4_client *clp, struct 
> nfs4_session *sess
> >  	};
> >  	int status;
> >  
> > -	status = rpc_call_sync(clnt, &msg, 0);
> > +	nfs4_init_channel_attrs(clp, &args.fc_attrs, &args.bc_attrs);
> > +	
> > +        status = rpc_call_sync(clnt, &msg, 0);
> > +
> > +	/* Set the negotiated values in the session's 
> channel_attrs struct 
> > +*/
> > +
> > +	if (!status) {
> > +		nfs4_adjust_channel_attrs(&args.fc_attrs, 
> > +					
> &session->fore_channel.chan_attrs);
> > +		nfs4_adjust_channel_attrs(&args.bc_attrs, 
> > +					
> &session->back_channel.chan_attrs);
> > +	}
> > +	
> >  	return status;
> >  }
> >  EXPORT_SYMBOL(_nfs4_proc_create_session);
> > @@ -3853,29 +4083,28 @@ EXPORT_SYMBOL(_nfs4_proc_create_session);
> >  int nfs4_proc_create_session(struct nfs4_client *clp)  {
> >  	int status;
> > -	unsigned long now;
> >  	struct nfs4_session *session;
> > -	struct nfs_fsinfo fsinfo;
> >  	u32 *ptr;
> >  
> > -	now = jiffies;
> > -
> >  	status = _nfs4_proc_create_session(clp, 
> clp->cl_session, clp->cl_rpcclient);
> >  	if (status)
> >  		return status;
> >  
> > -	status = nfs4_proc_get_lease_time(clp, &fsinfo);
> > -	if (status == 0) {
> > -		/* Update lease time and schedule renewal */
> > -		spin_lock(&clp->cl_lock);
> > -		clp->cl_lease_time = fsinfo.lease_time * HZ;
> > -		clp->cl_last_renewal = now;
> > -		clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
> > -		spin_unlock(&clp->cl_lock);
> > +	session = clp->cl_session;
> >  
> > -		nfs4_schedule_state_renewal(clp);
> > +	/* Init the fore channel */
> > +	status = nfs4_init_slot_table(&session->fore_channel);
> > +	dprintk("fc init returned %d\n", status);
> > +	if (status)
> > +		return status;
> > +
> > +	/* Init the back channel */
> > +	status = nfs4_init_slot_table(&session->back_channel);
> > +	dprintk("bc init returned %d\n", status);
> > +	if (status) {
> > +		nfs4_destroy_slot_table(&session->fore_channel);
> > +		return status;
> >  	}
> > -	session = clp->cl_session;
> >  
> >  	ptr = (int *)session->sess_id;
> >  	dprintk("sessionid is: %d:%d:%d:%d\n", ptr[0], ptr[1], ptr[2], 
> > ptr[3]); @@ -4769,11 +4998,8 @@ struct nfs4_session 
> > *nfs41_alloc_session(void)
> >  
> >  	session->expired = 1;
> >  
> > -	INIT_LIST_HEAD(&session->slots_in_use);
> > -	INIT_LIST_HEAD(&session->unused_slots);
> >  	INIT_LIST_HEAD(&session->session_hashtbl);
> >  
> > -	//rpc_init_wait_queue(&session->slot_waitq, "Slot waitqueue");
> >  	spin_lock_init(&session->session_lock);
> >  
> >  	sema_init(&session->session_sem, 1); @@ -4797,6 +5023,8 @@ void 
> > nfs4_get_session(struct nfs4_session *session)  void 
> > nfs4_put_session(struct nfs4_session **session)  {
> >  	if (atomic_dec_and_test(&((*session)->ref_count))) {
> > +		nfs4_destroy_slot_table(&((*session)->fore_channel));
> > +		nfs4_destroy_slot_table(&((*session)->back_channel));
> >  		nfs41_free_session(*session);
> >  		*session = NULL;
> >  	}
> > @@ -4806,7 +5034,11 @@ int nfs41_proc_setup_session(struct 
> nfs4_client 
> > *clp)  {
> >  	int status;
> >  
> > -	dprintk("in %s!\n", __FUNCTION__);
> > +        struct nfs_fsinfo fsinfo;
> > +	unsigned long now;
> > +	
> > +	now = jiffies;
> > +
> >  	if (!clp->cl_session) {
> >  		/* create the session struct to hold the 
> session parameters */
> >  		clp->cl_session = nfs41_alloc_session(); @@ 
> -4844,6 +5076,20 @@ int 
> > nfs41_proc_setup_session(struct nfs4_client *clp)
> >  	clp->cl_session->expired = 0;
> >  	clp->cl_session->client = clp;
> >  
> > +	status = nfs4_proc_get_lease_time(clp, &fsinfo);
> > +
> > +	if (status) 
> > +		goto out_free;
> > +	
> > +	/* Update lease time and schedule renewal */
> > +	spin_lock(&clp->cl_lock);
> > +	clp->cl_lease_time = fsinfo.lease_time * HZ;
> > +	clp->cl_last_renewal = now;
> > +	clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
> > +	spin_unlock(&clp->cl_lock);
> > +
> > +	nfs4_schedule_state_renewal(clp);
> > +	
> >  	clp->cl_session->mutating = 0;
> >  out:
> >  	up(&clp->cl_session->session_sem);
> > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 
> > 5ef6d70..7da9a6a 100644
> > --- a/fs/nfs/nfs4xdr.c
> > +++ b/fs/nfs/nfs4xdr.c
> > @@ -1429,20 +1429,22 @@ static int 
> encode_create_session(struct xdr_stream *xdr, struct nfs41_create_ses
> >  	RESERVE_SPACE(2*28);                    /* 2 channel_attrs */
> >  
> >  	/* Fore Channel */
> > -	WRITE32(32768);                         /* max req size */
> > -	WRITE32(32768);                         /* max resp size */
> > -	WRITE32(32768);                         /* Max resp 
> size cached */
> > -	WRITE32(RPC_DEF_SLOT_TABLE);            /* max operations */
> > -	WRITE32(RPC_DEF_SLOT_TABLE);            /* max requests */
> > +	WRITE32(args->fc_attrs.max_rqst_sz);	/* max req size */
> > +	WRITE32(args->fc_attrs.max_resp_sz);	/* max resp size */
> > +	WRITE32(args->fc_attrs.max_resp_sz_cached);	/* Max 
> resp sz cached */
> > +	WRITE32(args->fc_attrs.max_ops);	/* max operations */
> > +	WRITE32(args->fc_attrs.max_reqs);	/* max requests */
> > +
> >  	WRITE32(0);                             /* 
> Streamchannel attrs */
> >  	WRITE32(0);                             /*rdmachannel_attrs */
> >  
> >  	/* Back Channel */
> > -	WRITE32(32768);                         /* max req size */
> > -	WRITE32(32768);                         /* max resp size */
> > -	WRITE32(32768);                         /* Max resp 
> size cached */
> > -	WRITE32(RPC_DEF_SLOT_TABLE);            /* max operations */
> > -	WRITE32(RPC_DEF_SLOT_TABLE);            /* max requests */
> > +	WRITE32(args->bc_attrs.max_rqst_sz);	/* max req size */
> > +	WRITE32(args->bc_attrs.max_resp_sz);	/* max resp size */
> > +	WRITE32(args->bc_attrs.max_resp_sz_cached);	/* Max 
> resp sz cached */
> > +	WRITE32(args->bc_attrs.max_ops);	/* max operations */
> > +	WRITE32(args->bc_attrs.max_reqs);	/* max requests */
> > +
> >  	WRITE32(0);                             /* 
> Streamchannel attrs */
> >  	WRITE32(0);                             /*rdmachannel_attrs */
> >  
> > @@ -6404,7 +6406,7 @@ static int 
> nfs41_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p, str
> >  		status = decode_putrootfh(&xdr);
> >  	if (!status)
> >  		status = decode_fsinfo(&xdr, res->fsinfo);
> > -	if (!status)
> > +	if (status)
> >  		status = -nfs_stat_to_errno(hdr.status);
> >  	return status;
> >  }
> > diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a3bc630..89eb564 
> > 100644
> > --- a/fs/nfs/super.c
> > +++ b/fs/nfs/super.c
> > @@ -1260,15 +1260,13 @@ static int nfs4_fill_super(struct 
> super_block *sb, struct nfs4_mount_data *data,
> >                  server->nfs4_state->cl_minorversion = i;
> >  
> >                  if (server->rpc_ops->setup_session) {
> > -                        int status;
> > -
> >                          lock_kernel();
> >                          down_write(&server->nfs4_state->cl_sem);
> > -                        status =  
> server->rpc_ops->setup_session(server->nfs4_state);
> > +                        err =  
> > + server->rpc_ops->setup_session(server->nfs4_state);
> >                          up_write(&server->nfs4_state->cl_sem);
> >                          unlock_kernel();
> >  
> > -                        if (status) {
> > +                        if (err) {
> >                                  printk(KERN_EMERG 
> "Couldn't mount using minorversion %d\n", i);
> >                                  
> rpc_shutdown_client(server->client);
> >                          }
> > diff --git a/include/linux/nfs41_sessions.h 
> > b/include/linux/nfs41_sessions.h new file mode 100644 index 
> > 0000000..02f45db
> > --- /dev/null
> > +++ b/include/linux/nfs41_sessions.h
> > @@ -0,0 +1,72 @@
> > +#ifndef __NFS4_1_SESSIONS_H__
> > +#define __NFS4_1_SESSIONS_H__
> > +
> > +/* The flags for the nfs4_slot struct */
> > +#define NFS4_SLOT_BUSY		0X0	/* Slot in use */
> > +#define NFS4_SLOT_RECLAIMED	0x1	/* Slot has 
> been reclaimed by
> > +					   the server */
> > +
> > +typedef unsigned char	 	sessionid_t[16];
> > +typedef u32			streamchannel_attrs;
> > +typedef u32			rdmachannel_attrs;
> > +
> > +struct nfs4_channel_attrs {
> > +	u32			max_rqst_sz;
> > +	u32			max_resp_sz;
> > +	u32			max_resp_sz_cached;
> > +	u32			max_ops;
> > +	u32			max_reqs;
> > +	streamchannel_attrs	stream_attrs;
> > +	rdmachannel_attrs	rdma_attrs;
> > +};
> > +
> > +struct nfs4_slot {
> > +	u32		 	slot_nr;
> > +	u32		 	seq_nr;
> > +	unsigned long		flags;
> > +	u32	 		nr_waiters;
> > +	spinlock_t		slot_lock;
> > +};
> > +
> > +struct nfs4_slot_table {
> > +	struct nfs4_slot	*slots;
> > +	u32			max_slots;
> > +	spinlock_t		slot_tbl_lock;
> > +};
> > +
> > +struct nfs4_channel {
> > +	struct nfs4_channel_attrs 	chan_attrs;
> > +	struct rpc_clnt 		*rpc_client;
> > +	struct nfs4_slot_table		slot_table;
> > +};
> > +
> > +struct nfs4_session {
> > +	/* Session related params */
> > +	sessionid_t			sess_id;
> > +	u32				seqid;	/* The seqid 
> returned by 
> > +						   exchange_id */
> > +	u32				persist;
> > +	u32				header_padding;
> > +	u32				hash_alg;
> > +	u32				ssv_len;
> > +	u32				use_for_back_chan;
> > +	u32				rdma_mode;
> > +
> > +	/* The fore and back channel */
> > +	struct nfs4_channel		fore_channel;
> > +	struct nfs4_channel		back_channel;
> > +
> > +	unsigned int			expired;
> > +	struct nfs4_client *		client;
> > +	struct list_head		session_hashtbl;
> > +	spinlock_t 			session_lock;
> > +	/* To prevent races between create_session and sequence */
> > +	int 				mutating;
> > +	struct semaphore		session_sem;
> > +	atomic_t			ref_count;
> > +};
> > +
> > +
> > +#endif
> > +
> > +
> > diff --git a/include/linux/nfs_xdr.h 
> b/include/linux/nfs_xdr.h index 
> > 4648707..52b9e38 100644
> > --- a/include/linux/nfs_xdr.h
> > +++ b/include/linux/nfs_xdr.h
> > @@ -3,6 +3,7 @@
> >  
> >  #include <linux/sunrpc/xprt.h>
> >  #include <linux/nfsacl.h>
> > +#include <linux/nfs41_sessions.h>
> >  
> >  /*
> >   * To change the maximum rsize and wsize supported by the 
> NFS client, 
> > adjust @@ -778,6 +779,8 @@ struct nfs41_create_session_args {
> >  	uint32_t			use_for_backchannel;
> >  	uint32_t			use_for_rdma;
> >  	uint32_t			cb_program;
> > +	struct nfs4_channel_attrs	fc_attrs;	/* Fore 
> Channel */
> > +	struct nfs4_channel_attrs	bc_attrs;	/* Back 
> Channel */
> >  };
> >  
> >  struct nfs41_create_session_res {
> > @@ -805,6 +808,7 @@ struct nfs41_sequence_res {
> >  	u32				target_maxslots;
> >  	u32				status_flags;
> >  	struct nfs4_state_owner		*sp;
> > +	struct nfs4_slot		*slot;
> >  };
> >  
> >  struct nfs4_get_lease_time_args {
> > diff --git a/include/linux/nfsd/state.h 
> b/include/linux/nfsd/state.h 
> > index 8f699b4..fde667e 100644
> > --- a/include/linux/nfsd/state.h
> > +++ b/include/linux/nfsd/state.h
> > @@ -47,8 +47,6 @@ typedef struct {
> >  	u32             cl_id;
> >  } clientid_t;
> >  
> > -typedef unsigned char sessionid_t[16];
> > -
> >  typedef struct {
> >  	u32             so_boot;
> >  	u32             so_stateownerid;
> 


More information about the pNFS mailing list