[pnfs] [PATCH 1/2] Slot Table Implementation
Benny Halevy
bhalevy at panasas.com
Sat Mar 17 06:54:11 EDT 2007
Rahul, this is not urgent but I really think we should move to a single
queue for several reason. With your implementation when all slots
are used you have to do max_slots test_and_set operations to
discover that. This will just not scale. I agree with your point
about semaphores and renegotiating the number of slots so how
about this (sketch) instead:
nfs41_proc_sequence_done()
{
int need_wakeup = 0;
spin_lock(&res->channel->channel_lock);
list_add(&res->slot->slot_list, &res->channel->unused_slots);
spin_unlock(&res->channel->channel_lock);
/* Wake up the threads waiting on this session */
rpc_wake_up(&res->channel->sl_waitq);
return status;
}
struct nfs4_slot *nfs4_find_slot(struct nfs4_channel *channel)
{
struct nfs4_slot *slot;
might_sleep();
for (;;) {
int ret = wait_event_interruptible(&channel->sl_waitq,
!list_empty(&channel->unused_slots));
if (unlikely(ret) {
BUG_ON(ret != -ERESTARTSYS);
break;
}
spin_lock(&channel->channel_lock);
if (unlikely(list_empty(&channel->unused_slots))) {
spin_unlock(&channel->channel_lock);
continue;
}
slot = list_entry(channel->unused_slots.next,
struct nfs4_slot,
slot_list);
list_del(&slot->slot_list);
spin_unlock(&channel->channel_lock);
return slot;
}
return NULL;
}
Iyer, Rahul wrote:
>Hi Benny,
>A comment inline...
>Regards
>Rahul
>
>
>
>
>>-----Original Message-----
>>From: Benny Halevy [mailto:bhalevy at panasas.com]
>>Sent: Friday, March 16, 2007 2:24 AM
>>To: Iyer, Rahul
>>Cc: pnfs at linux-nfs.org
>>Subject: Re: [pnfs] [PATCH 1/2] Slot Table Implementation
>>
>>Rahul, my comments below...
>>
>>Benny
>>
>>iyer at netapp.com wrote:
>>
>>
>>>From: iyer <iyer at netapp.com>
>>>
>>>Added code to implement a slot table.
>>>
>>>Signed-off-by: iyer <iyer at netapp.com>
>>>---
>>> fs/nfs/nfs41_sessions.h | 62 --------
>>> fs/nfs/nfs4_fs.h | 2 +-
>>> fs/nfs/nfs4proc.c | 326
>>>
>>>
>>+++++++++++++++++++++++++++++++++++-----
>>
>>
>>> fs/nfs/nfs4xdr.c | 24 ++--
>>> fs/nfs/super.c | 6 +-
>>> include/linux/nfs41_sessions.h | 72 +++++++++
>>> include/linux/nfs_xdr.h | 4 +
>>> include/linux/nfsd/state.h | 2 -
>>> 8 files changed, 378 insertions(+), 120 deletions(-) delete mode
>>>100644 fs/nfs/nfs41_sessions.h create mode 100644
>>>include/linux/nfs41_sessions.h
>>>
>>>diff --git a/fs/nfs/nfs41_sessions.h
>>>
>>>
>>b/fs/nfs/nfs41_sessions.h deleted
>>
>>
>>>file mode 100644 index fc658c5..0000000
>>>--- a/fs/nfs/nfs41_sessions.h
>>>+++ /dev/null
>>>@@ -1,62 +0,0 @@
>>>-#ifndef __NFS4_1_SESSIONS_H__
>>>-#define __NFS4_1_SESSIONS_H__
>>>-
>>>-typedef unsigned char sessionid_t[16];
>>>-typedef u32 streamchannel_attrs;
>>>-typedef u32 rdmachannel_attrs;
>>>-
>>>-struct nfs4_channel_attrs {
>>>- unsigned long max_rqst_sz;
>>>- unsigned long max_resp_sz;
>>>- unsigned long max_resp_sz_cached;
>>>- unsigned long max_ops;
>>>- unsigned long max_reqs;
>>>- streamchannel_attrs stream_attrs;
>>>- rdmachannel_attrs rdma_attrs;
>>>-};
>>>-
>>>-struct nfs4_channel {
>>>- struct nfs4_channel_attrs chan_attrs;
>>>- unsigned long nr_conns;
>>>- struct list_head rpc_clients;
>>>-};
>>>-
>>>-struct nfs4_session {
>>>- /* Session related params */
>>>- sessionid_t sess_id;
>>>- u32 seqid; /* The seqid returned
>>>
>>>
>>by exchange_id */
>>
>>
>>>- u32 persist;
>>>- u32 header_padding;
>>>- u32 hash_alg;
>>>- u32 ssv_len;
>>>- u32 use_for_back_chan;
>>>- u32 rdma_mode;
>>>-
>>>- /* Slotid management */
>>>- unsigned long nr_slots_in_use;
>>>- struct list_head slots_in_use;
>>>- struct list_head unused_slots;
>>>- struct rpc_wait_queue slot_waitq;
>>>-
>>>- /* The fore and back channel */
>>>- struct nfs4_channel fore_channel;
>>>- struct nfs4_channel back_channel;
>>>-
>>>- unsigned int expired;
>>>- struct nfs4_client * client;
>>>- struct list_head session_hashtbl;
>>>- spinlock_t session_lock;
>>>- /* To prevent races between create_session and sequence */
>>>- int mutating;
>>>- struct semaphore session_sem;
>>>- atomic_t ref_count;
>>>-};
>>>-
>>>-struct nfs4_slot {
>>>- u32 slot_nr;
>>>- u32 seq_nr;
>>>- struct nfs4_session * session;
>>>- struct list_head slot_list;
>>>-};
>>>-
>>>-#endif
>>>diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index
>>>195d757..e50657e 100644
>>>--- a/fs/nfs/nfs4_fs.h
>>>+++ b/fs/nfs/nfs4_fs.h
>>>@@ -9,7 +9,7 @@
>>> #ifndef __LINUX_FS_NFS_NFS4_FS_H
>>> #define __LINUX_FS_NFS_NFS4_FS_H
>>>
>>>-#include "nfs41_sessions.h"
>>>+#include <linux/nfs41_sessions.h>
>>>
>>> #ifdef CONFIG_NFS_V4
>>> #define NFSV4_MAX_MINORVERSION 1
>>>diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index
>>>167ff4a..28513a1 100644
>>>--- a/fs/nfs/nfs4proc.c
>>>+++ b/fs/nfs/nfs4proc.c
>>>@@ -49,6 +49,7 @@
>>> #include <linux/namei.h>
>>> #include <linux/mount.h>
>>> #include <linux/module.h>
>>>+#include <linux/bitops.h>
>>>
>>> #include "nfs4_fs.h"
>>> #include "delegation.h"
>>>@@ -221,8 +222,10 @@ static int
>>>
>>>
>>nfs41_proc_sequence_done(struct nfs4_session *session, struct nfs41_s
>>
>>
>>> unsigned long timestamp;
>>> struct nfs4_client *clp;
>>>
>>>- if (!session || !(clp = session->client))
>>>- return 0;
>>>+ if (!session || !(clp = session->client)) {
>>>+ printk(KERN_EMERG "%s is NULL!!!\n",
>>>
>>>
>>(!session)?"session":"clp");
>>
>>
>>>+ BUG();
>>>+ }
>>>
>>>
>>For performance reasons the defensive code should only be
>>active with kernel debugging. The conditional statement
>>above is always executed needlessly and is not as clean as it
>>could be because of the assignment embedded in the boolean
>>expression. Therefore, this better be coded like this:
>>
>> BUG_ON(!session);
>> clp = session->client;
>> BUG_ON(!clp);
>>
>>
>>
>>>
>>> if (!status) {
>>> timestamp = jiffies;
>>>@@ -233,17 +236,131 @@ static int
>>>
>>>
>>nfs41_proc_sequence_done(struct nfs4_session *session, struct nfs41_s
>>
>>
>>> spin_unlock(&clp->cl_lock);
>>> }
>>>
>>>+ /* Clear the 'busy' bit on the slot that was used */
>>>+ smp_mb__before_clear_bit();
>>>+ clear_bit(NFS4_SLOT_BUSY, &res->slot->flags);
>>>+ smp_mb__after_clear_bit();
>>>+
>>>+ printk(KERN_EMERG "waking up waiters on slot %d\n",
>>>+res->slot->slot_nr);
>>>+
>>>+ /* Wake up the threads waiting on this slot */
>>>+ wake_up_bit(&res->slot->flags, NFS4_SLOT_BUSY);
>>>+
>>> return status;
>>> }
>>>
>>>+static int nfs4_wait_bit_interruptible(void *word) {
>>>+ if (signal_pending(current))
>>>+ return -ERESTARTSYS;
>>>+ schedule();
>>>+ return 0;
>>>+}
>>>+
>>>+/* Find the lowest numbered slot or sleep on the least
>>>
>>>
>>loaded slot */
>>
>>
>>>+struct nfs4_slot *nfs4_find_slot(struct nfs4_channel *channel) {
>>>+ struct nfs4_slot_table *tbl;
>>>+ struct nfs4_slot *slot;
>>>+ struct nfs4_slot *target_slot;
>>>+ u32 max_slots;
>>>+ u32 min_waiters;
>>>+ int i;
>>>+ int need_to_sleep;
>>>+
>>>+ might_sleep();
>>>+
>>>+ tbl = &channel->slot_table;
>>>+ min_waiters = tbl->slots[0].nr_waiters;
>>>
>>>
>>atomic_read()
>>
>>
>>
>>>+ target_slot = &tbl->slots[0];
>>>+
>>>+ spin_lock(&tbl->slot_tbl_lock);
>>>+ /* Make a local copy of max slots so we don't need to
>>>
>>>
>>hold it through
>>
>>
>>>+ * out.
>>>+ * XXX Will need to revalidate this if slots are reclaimed
>>>+ */
>>>+ max_slots = tbl->max_slots;
>>>+ spin_unlock(&tbl->slot_tbl_lock);
>>>
>>>
>>Using a spinlock here is unreasonably expensive when you
>>could use atomic counters since the lock overhead is not
>>amortized over multiple accesses to the structure.
>>max_slots and nr_waiters should be defined as atomic_t and
>>atomic operations be used to access them; Alternatively use
>>one spinlock for the whole channel around the outer loop
>>
>>
>>
>>>+
>>>+ do {
>>>+ need_to_sleep = 1;
>>>+ for (i = 0; i < max_slots; ++i) {
>>>+ slot = &tbl->slots[i];
>>>+ if (!test_and_set_bit(NFS4_SLOT_BUSY,
>>>
>>>
>>&slot->flags)){
>>
>>
>>>+ /* We found an empty slot */
>>>+ target_slot = slot;
>>>+ need_to_sleep = 0;
>>>+ break;
>>>+ }
>>>+
>>>+ spin_lock(&slot->slot_lock);
>>>+ if (min_waiters > slot->nr_waiters) {
>>>+ min_waiters = slot->nr_waiters;
>>>+ target_slot = slot;
>>>+ }
>>>+ spin_unlock(&slot->slot_lock);
>>>
>>>
>>why lock? use atomic_read(&slot->nr_waiters) also, if several
>>threads go through this code in parallel they will pick the
>>same target_slot so you want to increment.
>>That's another reason to use a bigger lock.
>>
>>
>
>Using atomic_t is a bit hard when you have to do compare and set kind of
>operations...
>Like for instance:
>
>If (min_waiters > slot->nr_waiters) {
> min_waiters = slot->nr_waiters;
> target_slot = slot;
>}
>
>Doing the above atomically is hard with atomic variables only because
>the whole sequence needs to be atomic. However, I can change the
>spin_lock used to rwlock_t. This should better solve the problem. Right?
>
>
>
>>>+ printk(KERN_EMERG "slot %d has busy bit
>>>
>>>
>>%s and nr_waiters is %u\n"
>>
>>
>>>+ , slot->slot_nr,
>>>+ (need_to_sleep)?"set":"unset",
>>>+ slot->nr_waiters);
>>>+ }
>>>+
>>>+ /* Check whether we need to sleep. If so, sleep
>>>
>>>
>>on the BUSY bit
>>
>>
>>>+ * Increment the nr_waiters before sleeping and
>>>
>>>
>>decrement it
>>
>>
>>>+ * when woken up
>>>+ */
>>>+ if (need_to_sleep) {
>>>+ printk(KERN_EMERG "sleeping on slot %d;
>>>
>>>
>>seq_nr: %d\n",
>>
>>
>>>+ target_slot->slot_nr,
>>>+ target_slot->seq_nr);
>>>+ spin_lock(&target_slot->slot_lock);
>>>+ ++target_slot->nr_waiters;
>>>+ spin_unlock(&target_slot->slot_lock);
>>>
>>>
>>atomic_inc()
>>
>>
>>
>>>+
>>>+ /* XXX: We need to check the return value of
>>>+ * eait_on_bit so that we can check if it's
>>>+ * interrupted.
>>>+ */
>>>+ wait_on_bit(&target_slot->flags,
>>>
>>>
>>NFS4_SLOT_BUSY,
>>
>>
>>>+ nfs4_wait_bit_interruptible,
>>>+ TASK_INTERRUPTIBLE);
>>>+
>>>+ spin_lock(&target_slot->slot_lock);
>>>+ --target_slot->nr_waiters;
>>>+ spin_unlock(&target_slot->slot_lock);
>>>
>>>
>>atomic_dec()
>>
>>But wait a second, why do you want a queue per slot to begin with?
>>It seems like you can just go with a semaphore initialized to
>>the number of slots, do down_interruptible before scanning
>>for a free slot and up in nfs41_proc_sequence_done. Take a
>>spin lock to scan the slot table (last index can be saved and
>>rotated for uniformity) and if not free slot is found do a
>>BUG() since the semaphore should prevent that from happening;
>>
>>
>>
>>
>>>+ }
>>>+ else
>>>+ break;
>>>+ }while (test_and_set_bit(NFS4_SLOT_BUSY, &target_slot->flags));
>>>+
>>>+ printk(KERN_EMERG "slot id: %u\nseqid: %u\n max_slots: %u\n",
>>>+ target_slot->slot_nr, target_slot->seq_nr,
>>>+ max_slots);
>>>+
>>>+ return target_slot;
>>>+}
>>>+
>>> static int _nfs41_proc_setup_sequence(struct nfs4_session
>>>
>>>
>>*session,
>>
>>
>>>struct nfs41_sequence_args *args, struct nfs41_sequence_res *res) {
>>> u32 *ptr;
>>>-
>>>+ struct nfs4_slot *slot;
>>>+
>>> ptr = (u32 *)session->sess_id;
>>> dprintk("%s: %u:%u:%u:%u\n", __FUNCTION__, ptr[0],
>>>
>>>
>>ptr[1], ptr[2],
>>
>>
>>>ptr[3]);
>>>
>>>- memcpy(args->sessionid, (unsigned char
>>>
>>>
>>*)session->sess_id, NFS4_MAX_SESSIONID_LEN);
>>
>>
>>>+ memcpy(args->sessionid, (unsigned char *)session->sess_id,
>>>+ NFS4_MAX_SESSIONID_LEN);
>>>+
>>>+ slot = nfs4_find_slot(&session->fore_channel);
>>>+
>>>+ /* XXX: Do we always increment this? Are there any
>>>
>>>
>>errors for which we
>>
>>
>>>+ * don't increment the sequence number?
>>>+ */
>>>+ args->seqid = slot->seq_nr++;
>>>+ args->slotid = slot->slot_nr;
>>>+ args->maxslots = session->fore_channel.slot_table.max_slots;
>>>+
>>>+ res->slot = slot;
>>>
>>> return 0;
>>> }
>>>@@ -1685,11 +1802,6 @@ static int nfs4_proc_get_root(struct
>>>
>>>
>>nfs_server *server, struct nfs_fh *fhandle,
>>
>>
>>> };
>>> int status;
>>>
>>>- if (server->rpc_ops->setup_sequence && (status =
>>>-
>>>
>>>
>>server->rpc_ops->setup_sequence(server->nfs4_state->cl_session,
>>
>>
>>>- &seqargs, &seqres)))
>>>- return status;
>>>-
>>> /*
>>> * Now we do a separate LOOKUP for each component of
>>>
>>>
>>the mount path.
>>
>>
>>> * The LOOKUPs are done separately so that we can
>>>
>>>
>>conveniently @@
>>
>>
>>>-1714,9 +1826,21 @@ static int nfs4_proc_get_root(struct nfs_server
>>>*server, struct nfs_fh *fhandle,
>>>
>>> do {
>>> nfs_fattr_init(fattr);
>>>+ if (server->rpc_ops->setup_sequence &&
>>>
>>>
>>(status =
>>
>>
>>>+ server->rpc_ops->setup_sequence(
>>>+ server->nfs4_state->cl_session,
>>>+ &seqargs, &seqres)))
>>>+ return status;
>>>+
>>> status = nfs4_handle_exception(server,
>>>
>>>
>>>
>>rpc_call_sync(server->client, &msg, 0),
>>
>>
>>> &exception);
>>>+
>>>+ if (server->rpc_ops->sequence_done)
>>>+ server->rpc_ops->sequence_done(
>>>+
>>>
>>>
>>server->nfs4_state->cl_session,
>>
>>
>>>+ &seqres, status);
>>>+
>>> } while (exception.retry);
>>> if (status == 0)
>>> continue;
>>>@@ -1731,10 +1855,6 @@ static int nfs4_proc_get_root(struct
>>>
>>>
>>nfs_server *server, struct nfs_fh *fhandle,
>>
>>
>>> if (status == 0)
>>> status = nfs4_do_fsinfo(server, fhandle, info);
>>> out:
>>>- if (server->rpc_ops->sequence_done)
>>>-
>>>
>>>
>>server->rpc_ops->sequence_done(server->nfs4_state->cl_session,
>>
>>
>>>- &seqres, status);
>>>-
>>> return nfs4_map_errors(status);
>>> }
>>>
>>>@@ -3268,6 +3388,7 @@ int nfs4_proc_async_sequence(struct
>>>
>>>
>>nfs4_client *clp, struct rpc_cred *cred)
>>
>>
>>> ret = rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
>>> &nfs4_sequence_ops, (void *)jiffies);
>>>
>>>+ nfs41_proc_sequence_done(clp->cl_session, res, ret);
>>> out:
>>> return ret;
>>> out_free:
>>>@@ -3581,14 +3702,6 @@ nfs4_async_handle_error(struct
>>>
>>>
>>rpc_task *task, const struct nfs_server *server)
>>
>>
>>> return 0;
>>> }
>>>
>>>-static int nfs4_wait_bit_interruptible(void *word) -{
>>>- if (signal_pending(current))
>>>- return -ERESTARTSYS;
>>>- schedule();
>>>- return 0;
>>>-}
>>>-
>>> static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct
>>>nfs4_client *clp) {
>>> sigset_t oldset;
>>>@@ -3823,6 +3936,111 @@ int nfs4_proc_get_lease_time(struct
>>>
>>>
>>nfs4_client *clp, struct nfs_fsinfo *fsinfo)
>>
>>
>>> return status;
>>> }
>>>
>>>+/* Initialize a slot table */
>>>+int nfs4_init_slot_table(struct nfs4_channel *channel) {
>>>+ int i;
>>>+ struct nfs4_slot_table *tbl;
>>>+ struct nfs4_slot *slot;
>>>+
>>>+ tbl = &channel->slot_table;
>>>+ tbl->max_slots = channel->chan_attrs.max_reqs;
>>>+
>>>+ tbl->slots = kzalloc(tbl->max_slots * sizeof(struct
>>>
>>>
>>nfs4_slot), GFP_ATOMIC);
>>
>>
>>>+ if (!tbl->slots)
>>>+ return -ENOMEM;
>>>+
>>>+ spin_lock_init(&tbl->slot_tbl_lock);
>>>+
>>>+ for (i = 0; i < tbl->max_slots; ++i) {
>>>+ slot = &tbl->slots[i];
>>>+
>>>+ slot->slot_nr = i;
>>>+ slot->seq_nr = 1;
>>>+ slot->flags = 0;
>>>+ slot->nr_waiters = 0;
>>>+ spin_lock_init(&slot->slot_lock);
>>>+ }
>>>+
>>>+ return 0;
>>>+}
>>>+
>>>+/* Destroy the slot table */
>>>+void nfs4_destroy_slot_table(struct nfs4_channel *channel) {
>>>+ int i;
>>>+ struct nfs4_slot *slot;
>>>+ struct nfs4_slot_table *tbl;
>>>+
>>>+ tbl = &channel->slot_table;
>>>+
>>>+ for (i = 0; i < tbl->max_slots;++i) {
>>>+ slot = &tbl->slots[i];
>>>+
>>>+ if (slot->nr_waiters)
>>>+ BUG();
>>>+ }
>>>+
>>>+ kfree(channel->slot_table.slots);
>>>+ channel->slot_table.slots = NULL;
>>>+
>>>+ return;
>>>+}
>>>+
>>>+/* dump the channel attributes */
>>>+void nfs4_dump_channel_attrs(struct nfs4_channel_attrs *attrs) {
>>>+ printk(KERN_INFO "max_rqst_sz: %u\n", attrs->max_rqst_sz);
>>>+ printk(KERN_INFO "max_resp_sz: %u\n", attrs->max_resp_sz);
>>>+ printk(KERN_INFO "max_resp_sz_cached: %u\n",
>>>
>>>
>>attrs->max_resp_sz_cached);
>>
>>
>>>+ printk(KERN_INFO "max_ops: %u\n", attrs->max_ops);
>>>+ printk(KERN_INFO "max_reqs: %u\n", attrs->max_reqs); }
>>>+
>>>+/* Initialize the values to be used by the client in
>>>
>>>
>>CREATE_SESSION
>>
>>
>>>+*/ void nfs4_init_channel_attrs(struct nfs4_client *clp,
>>>+ struct nfs4_channel_attrs *fc_attrs,
>>>+ struct nfs4_channel_attrs *bc_attrs) {
>>>+ /* XXX: We need to have good values here... 32K is a
>>>
>>>
>>wild guess */
>>
>>
>>>+ fc_attrs->max_rqst_sz = bc_attrs->max_rqst_sz = 32768;
>>>+ fc_attrs->max_resp_sz = bc_attrs->max_resp_sz = 32768;
>>>+ fc_attrs->max_resp_sz_cached =
>>>
>>>
>>bc_attrs->max_resp_sz_cached = 32768;
>>
>>
>>>+ fc_attrs->max_ops = bc_attrs->max_ops = 0xFFFFFFFF;
>>>+ fc_attrs->max_reqs = bc_attrs->max_reqs =
>>>+ clp->cl_rpcclient->cl_xprt->max_reqs;
>>>+ fc_attrs->stream_attrs = bc_attrs->stream_attrs = 0;
>>>+ fc_attrs->rdma_attrs = bc_attrs->rdma_attrs = 0;
>>>+
>>>+}
>>>+
>>>+/* Check the values returned by the server for
>>>
>>>
>>CREATE_SESSION. Since
>>
>>
>>>+we made
>>>+ * our needs known, if the server gives us more than we need, we
>>>+don't bother
>>>+ * with it.
>>>+ */
>>>+void nfs4_adjust_channel_attrs(struct nfs4_channel_attrs
>>>
>>>
>>*req_attrs,
>>
>>
>>>+ struct nfs4_channel_attrs *resp_attrs) {
>>>+ if (req_attrs->max_rqst_sz < resp_attrs->max_rqst_sz)
>>>+ resp_attrs->max_rqst_sz = req_attrs->max_rqst_sz;
>>>+
>>>+ if (req_attrs->max_resp_sz < resp_attrs->max_resp_sz)
>>>+ resp_attrs->max_resp_sz = req_attrs->max_resp_sz;
>>>+
>>>+ if (req_attrs->max_resp_sz_cached < resp_attrs->max_resp_sz)
>>>+ resp_attrs->max_resp_sz = req_attrs->max_resp_sz_cached;
>>>+
>>>+ if (req_attrs->max_ops < resp_attrs->max_ops)
>>>+ resp_attrs->max_ops = req_attrs->max_ops;
>>>+
>>>+ if (req_attrs->max_reqs < resp_attrs->max_reqs)
>>>+ resp_attrs->max_reqs = req_attrs->max_reqs;
>>>+
>>>+ /* XXX: We ignore the stream channel attributes... we
>>>
>>>
>>have no idea what
>>
>>
>>>+ * to do with them anyways!
>>>+ */
>>>+}
>>>+
>>> int _nfs4_proc_create_session(struct nfs4_client *clp, struct
>>>nfs4_session *session, struct rpc_clnt *clnt) {
>>> struct nfs41_create_session_args args = { @@ -3845,7
>>>
>>>
>>+4063,19 @@ int
>>
>>
>>>_nfs4_proc_create_session(struct nfs4_client *clp, struct
>>>
>>>
>>nfs4_session *sess
>>
>>
>>> };
>>> int status;
>>>
>>>- status = rpc_call_sync(clnt, &msg, 0);
>>>+ nfs4_init_channel_attrs(clp, &args.fc_attrs, &args.bc_attrs);
>>>+
>>>+ status = rpc_call_sync(clnt, &msg, 0);
>>>+
>>>+ /* Set the negotiated values in the session's
>>>
>>>
>>channel_attrs struct
>>
>>
>>>+*/
>>>+
>>>+ if (!status) {
>>>+ nfs4_adjust_channel_attrs(&args.fc_attrs,
>>>+
>>>
>>>
>>&session->fore_channel.chan_attrs);
>>
>>
>>>+ nfs4_adjust_channel_attrs(&args.bc_attrs,
>>>+
>>>
>>>
>>&session->back_channel.chan_attrs);
>>
>>
>>>+ }
>>>+
>>> return status;
>>> }
>>> EXPORT_SYMBOL(_nfs4_proc_create_session);
>>>@@ -3853,29 +4083,28 @@ EXPORT_SYMBOL(_nfs4_proc_create_session);
>>> int nfs4_proc_create_session(struct nfs4_client *clp) {
>>> int status;
>>>- unsigned long now;
>>> struct nfs4_session *session;
>>>- struct nfs_fsinfo fsinfo;
>>> u32 *ptr;
>>>
>>>- now = jiffies;
>>>-
>>> status = _nfs4_proc_create_session(clp,
>>>
>>>
>>clp->cl_session, clp->cl_rpcclient);
>>
>>
>>> if (status)
>>> return status;
>>>
>>>- status = nfs4_proc_get_lease_time(clp, &fsinfo);
>>>- if (status == 0) {
>>>- /* Update lease time and schedule renewal */
>>>- spin_lock(&clp->cl_lock);
>>>- clp->cl_lease_time = fsinfo.lease_time * HZ;
>>>- clp->cl_last_renewal = now;
>>>- clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
>>>- spin_unlock(&clp->cl_lock);
>>>+ session = clp->cl_session;
>>>
>>>- nfs4_schedule_state_renewal(clp);
>>>+ /* Init the fore channel */
>>>+ status = nfs4_init_slot_table(&session->fore_channel);
>>>+ dprintk("fc init returned %d\n", status);
>>>+ if (status)
>>>+ return status;
>>>+
>>>+ /* Init the back channel */
>>>+ status = nfs4_init_slot_table(&session->back_channel);
>>>+ dprintk("bc init returned %d\n", status);
>>>+ if (status) {
>>>+ nfs4_destroy_slot_table(&session->fore_channel);
>>>+ return status;
>>> }
>>>- session = clp->cl_session;
>>>
>>> ptr = (int *)session->sess_id;
>>> dprintk("sessionid is: %d:%d:%d:%d\n", ptr[0], ptr[1], ptr[2],
>>>ptr[3]); @@ -4769,11 +4998,8 @@ struct nfs4_session
>>>*nfs41_alloc_session(void)
>>>
>>> session->expired = 1;
>>>
>>>- INIT_LIST_HEAD(&session->slots_in_use);
>>>- INIT_LIST_HEAD(&session->unused_slots);
>>> INIT_LIST_HEAD(&session->session_hashtbl);
>>>
>>>- //rpc_init_wait_queue(&session->slot_waitq, "Slot waitqueue");
>>> spin_lock_init(&session->session_lock);
>>>
>>> sema_init(&session->session_sem, 1); @@ -4797,6 +5023,8 @@ void
>>>nfs4_get_session(struct nfs4_session *session) void
>>>nfs4_put_session(struct nfs4_session **session) {
>>> if (atomic_dec_and_test(&((*session)->ref_count))) {
>>>+ nfs4_destroy_slot_table(&((*session)->fore_channel));
>>>+ nfs4_destroy_slot_table(&((*session)->back_channel));
>>> nfs41_free_session(*session);
>>> *session = NULL;
>>> }
>>>@@ -4806,7 +5034,11 @@ int nfs41_proc_setup_session(struct
>>>
>>>
>>nfs4_client
>>
>>
>>>*clp) {
>>> int status;
>>>
>>>- dprintk("in %s!\n", __FUNCTION__);
>>>+ struct nfs_fsinfo fsinfo;
>>>+ unsigned long now;
>>>+
>>>+ now = jiffies;
>>>+
>>> if (!clp->cl_session) {
>>> /* create the session struct to hold the
>>>
>>>
>>session parameters */
>>
>>
>>> clp->cl_session = nfs41_alloc_session(); @@
>>>
>>>
>>-4844,6 +5076,20 @@ int
>>
>>
>>>nfs41_proc_setup_session(struct nfs4_client *clp)
>>> clp->cl_session->expired = 0;
>>> clp->cl_session->client = clp;
>>>
>>>+ status = nfs4_proc_get_lease_time(clp, &fsinfo);
>>>+
>>>+ if (status)
>>>+ goto out_free;
>>>+
>>>+ /* Update lease time and schedule renewal */
>>>+ spin_lock(&clp->cl_lock);
>>>+ clp->cl_lease_time = fsinfo.lease_time * HZ;
>>>+ clp->cl_last_renewal = now;
>>>+ clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
>>>+ spin_unlock(&clp->cl_lock);
>>>+
>>>+ nfs4_schedule_state_renewal(clp);
>>>+
>>> clp->cl_session->mutating = 0;
>>> out:
>>> up(&clp->cl_session->session_sem);
>>>diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index
>>>5ef6d70..7da9a6a 100644
>>>--- a/fs/nfs/nfs4xdr.c
>>>+++ b/fs/nfs/nfs4xdr.c
>>>@@ -1429,20 +1429,22 @@ static int
>>>
>>>
>>encode_create_session(struct xdr_stream *xdr, struct nfs41_create_ses
>>
>>
>>> RESERVE_SPACE(2*28); /* 2 channel_attrs */
>>>
>>> /* Fore Channel */
>>>- WRITE32(32768); /* max req size */
>>>- WRITE32(32768); /* max resp size */
>>>- WRITE32(32768); /* Max resp
>>>
>>>
>>size cached */
>>
>>
>>>- WRITE32(RPC_DEF_SLOT_TABLE); /* max operations */
>>>- WRITE32(RPC_DEF_SLOT_TABLE); /* max requests */
>>>+ WRITE32(args->fc_attrs.max_rqst_sz); /* max req size */
>>>+ WRITE32(args->fc_attrs.max_resp_sz); /* max resp size */
>>>+ WRITE32(args->fc_attrs.max_resp_sz_cached); /* Max
>>>
>>>
>>resp sz cached */
>>
>>
>>>+ WRITE32(args->fc_attrs.max_ops); /* max operations */
>>>+ WRITE32(args->fc_attrs.max_reqs); /* max requests */
>>>+
>>> WRITE32(0); /*
>>>
>>>
>>Streamchannel attrs */
>>
>>
>>> WRITE32(0); /*rdmachannel_attrs */
>>>
>>> /* Back Channel */
>>>- WRITE32(32768); /* max req size */
>>>- WRITE32(32768); /* max resp size */
>>>- WRITE32(32768); /* Max resp
>>>
>>>
>>size cached */
>>
>>
>>>- WRITE32(RPC_DEF_SLOT_TABLE); /* max operations */
>>>- WRITE32(RPC_DEF_SLOT_TABLE); /* max requests */
>>>+ WRITE32(args->bc_attrs.max_rqst_sz); /* max req size */
>>>+ WRITE32(args->bc_attrs.max_resp_sz); /* max resp size */
>>>+ WRITE32(args->bc_attrs.max_resp_sz_cached); /* Max
>>>
>>>
>>resp sz cached */
>>
>>
>>>+ WRITE32(args->bc_attrs.max_ops); /* max operations */
>>>+ WRITE32(args->bc_attrs.max_reqs); /* max requests */
>>>+
>>> WRITE32(0); /*
>>>
>>>
>>Streamchannel attrs */
>>
>>
>>> WRITE32(0); /*rdmachannel_attrs */
>>>
>>>@@ -6404,7 +6406,7 @@ static int
>>>
>>>
>>nfs41_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p, str
>>
>>
>>> status = decode_putrootfh(&xdr);
>>> if (!status)
>>> status = decode_fsinfo(&xdr, res->fsinfo);
>>>- if (!status)
>>>+ if (status)
>>> status = -nfs_stat_to_errno(hdr.status);
>>> return status;
>>> }
>>>diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a3bc630..89eb564
>>>100644
>>>--- a/fs/nfs/super.c
>>>+++ b/fs/nfs/super.c
>>>@@ -1260,15 +1260,13 @@ static int nfs4_fill_super(struct
>>>
>>>
>>super_block *sb, struct nfs4_mount_data *data,
>>
>>
>>> server->nfs4_state->cl_minorversion = i;
>>>
>>> if (server->rpc_ops->setup_session) {
>>>- int status;
>>>-
>>> lock_kernel();
>>> down_write(&server->nfs4_state->cl_sem);
>>>- status =
>>>
>>>
>>server->rpc_ops->setup_session(server->nfs4_state);
>>
>>
>>>+ err =
>>>+ server->rpc_ops->setup_session(server->nfs4_state);
>>> up_write(&server->nfs4_state->cl_sem);
>>> unlock_kernel();
>>>
>>>- if (status) {
>>>+ if (err) {
>>> printk(KERN_EMERG
>>>
>>>
>>"Couldn't mount using minorversion %d\n", i);
>>
>>
>>>
>>>
>>>
>>rpc_shutdown_client(server->client);
>>
>>
>>> }
>>>diff --git a/include/linux/nfs41_sessions.h
>>>b/include/linux/nfs41_sessions.h new file mode 100644 index
>>>0000000..02f45db
>>>--- /dev/null
>>>+++ b/include/linux/nfs41_sessions.h
>>>@@ -0,0 +1,72 @@
>>>+#ifndef __NFS4_1_SESSIONS_H__
>>>+#define __NFS4_1_SESSIONS_H__
>>>+
>>>+/* The flags for the nfs4_slot struct */
>>>+#define NFS4_SLOT_BUSY 0X0 /* Slot in use */
>>>+#define NFS4_SLOT_RECLAIMED 0x1 /* Slot has
>>>
>>>
>>been reclaimed by
>>
>>
>>>+ the server */
>>>+
>>>+typedef unsigned char sessionid_t[16];
>>>+typedef u32 streamchannel_attrs;
>>>+typedef u32 rdmachannel_attrs;
>>>+
>>>+struct nfs4_channel_attrs {
>>>+ u32 max_rqst_sz;
>>>+ u32 max_resp_sz;
>>>+ u32 max_resp_sz_cached;
>>>+ u32 max_ops;
>>>+ u32 max_reqs;
>>>+ streamchannel_attrs stream_attrs;
>>>+ rdmachannel_attrs rdma_attrs;
>>>+};
>>>+
>>>+struct nfs4_slot {
>>>+ u32 slot_nr;
>>>+ u32 seq_nr;
>>>+ unsigned long flags;
>>>+ u32 nr_waiters;
>>>+ spinlock_t slot_lock;
>>>+};
>>>+
>>>+struct nfs4_slot_table {
>>>+ struct nfs4_slot *slots;
>>>+ u32 max_slots;
>>>+ spinlock_t slot_tbl_lock;
>>>+};
>>>+
>>>+struct nfs4_channel {
>>>+ struct nfs4_channel_attrs chan_attrs;
>>>+ struct rpc_clnt *rpc_client;
>>>+ struct nfs4_slot_table slot_table;
>>>+};
>>>+
>>>+struct nfs4_session {
>>>+ /* Session related params */
>>>+ sessionid_t sess_id;
>>>+ u32 seqid; /* The seqid
>>>
>>>
>>returned by
>>
>>
>>>+ exchange_id */
>>>+ u32 persist;
>>>+ u32 header_padding;
>>>+ u32 hash_alg;
>>>+ u32 ssv_len;
>>>+ u32 use_for_back_chan;
>>>+ u32 rdma_mode;
>>>+
>>>+ /* The fore and back channel */
>>>+ struct nfs4_channel fore_channel;
>>>+ struct nfs4_channel back_channel;
>>>+
>>>+ unsigned int expired;
>>>+ struct nfs4_client * client;
>>>+ struct list_head session_hashtbl;
>>>+ spinlock_t session_lock;
>>>+ /* To prevent races between create_session and sequence */
>>>+ int mutating;
>>>+ struct semaphore session_sem;
>>>+ atomic_t ref_count;
>>>+};
>>>+
>>>+
>>>+#endif
>>>+
>>>+
>>>diff --git a/include/linux/nfs_xdr.h
>>>
>>>
>>b/include/linux/nfs_xdr.h index
>>
>>
>>>4648707..52b9e38 100644
>>>--- a/include/linux/nfs_xdr.h
>>>+++ b/include/linux/nfs_xdr.h
>>>@@ -3,6 +3,7 @@
>>>
>>> #include <linux/sunrpc/xprt.h>
>>> #include <linux/nfsacl.h>
>>>+#include <linux/nfs41_sessions.h>
>>>
>>> /*
>>> * To change the maximum rsize and wsize supported by the
>>>
>>>
>>NFS client,
>>
>>
>>>adjust @@ -778,6 +779,8 @@ struct nfs41_create_session_args {
>>> uint32_t use_for_backchannel;
>>> uint32_t use_for_rdma;
>>> uint32_t cb_program;
>>>+ struct nfs4_channel_attrs fc_attrs; /* Fore
>>>
>>>
>>Channel */
>>
>>
>>>+ struct nfs4_channel_attrs bc_attrs; /* Back
>>>
>>>
>>Channel */
>>
>>
>>> };
>>>
>>> struct nfs41_create_session_res {
>>>@@ -805,6 +808,7 @@ struct nfs41_sequence_res {
>>> u32 target_maxslots;
>>> u32 status_flags;
>>> struct nfs4_state_owner *sp;
>>>+ struct nfs4_slot *slot;
>>> };
>>>
>>> struct nfs4_get_lease_time_args {
>>>diff --git a/include/linux/nfsd/state.h
>>>
>>>
>>b/include/linux/nfsd/state.h
>>
>>
>>>index 8f699b4..fde667e 100644
>>>--- a/include/linux/nfsd/state.h
>>>+++ b/include/linux/nfsd/state.h
>>>@@ -47,8 +47,6 @@ typedef struct {
>>> u32 cl_id;
>>> } clientid_t;
>>>
>>>-typedef unsigned char sessionid_t[16];
>>>-
>>> typedef struct {
>>> u32 so_boot;
>>> u32 so_stateownerid;
>>>
>>>
More information about the pNFS
mailing list