fs/nfs/inode.c | 9 +- fs/nfs/nfs4proc.c | 86 ++++++++++++++++----- fs/nfs/nfs4state.c | 200 ++++++++++++++++++------------------------------- include/linux/nfs_fs.h | 10 -- 4 files changed, 153 insertions(+), 152 deletions(-) diff -u --recursive --new-file --show-c-function linux-2.6.7-24-nfs4_exception/fs/nfs/inode.c linux-2.6.7-25-reboot_recovery/fs/nfs/inode.c --- linux-2.6.7-24-nfs4_exception/fs/nfs/inode.c 2004-07-02 09:41:21.000000000 -0400 +++ linux-2.6.7-25-reboot_recovery/fs/nfs/inode.c 2004-07-02 13:44:03.000000000 -0400 @@ -1536,8 +1536,13 @@ static int nfs4_fill_super(struct super_ memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); nfs_idmap_new(clp); } - if (list_empty(&clp->cl_superblocks)) - clear_bit(NFS4CLNT_OK, &clp->cl_state); + if (list_empty(&clp->cl_superblocks)) { + err = nfs4_init_client(clp); + if (err != 0) { + up_write(&clp->cl_sem); + goto out_fail; + } + } list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); clnt = rpc_clone_client(clp->cl_rpcclient); if (!IS_ERR(clnt)) diff -u --recursive --new-file --show-c-function linux-2.6.7-24-nfs4_exception/fs/nfs/nfs4proc.c linux-2.6.7-25-reboot_recovery/fs/nfs/nfs4proc.c --- linux-2.6.7-24-nfs4_exception/fs/nfs/nfs4proc.c 2004-07-02 13:43:36.000000000 -0400 +++ linux-2.6.7-25-reboot_recovery/fs/nfs/nfs4proc.c 2004-07-02 13:45:51.000000000 -0400 @@ -254,6 +254,7 @@ static int _nfs4_do_open(struct inode *d struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_client *clp = server->nfs4_state; struct inode *inode = NULL; int status; struct nfs_fattr f_attr = { @@ -279,6 +280,8 @@ static int _nfs4_do_open(struct inode *d .rpc_cred = cred, }; + /* Protect against reboot recovery conflicts */ + down_read(&clp->cl_sem); status = -ENOMEM; if (!(sp = nfs4_get_state_owner(server, cred))) { dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); @@ -342,15 +345,18 @@ static int _nfs4_do_open(struct inode *d up(&sp->so_sema); nfs4_put_state_owner(sp); + up_read(&clp->cl_sem); *res = state; return 0; out_err: if (sp != NULL) { + if (state != NULL) + nfs4_put_open_state(state); up(&sp->so_sema); nfs4_put_state_owner(sp); } - if (state != NULL) - nfs4_put_open_state(state); + /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */ + up_read(&clp->cl_sem); if (inode != NULL) iput(inode); *res = NULL; @@ -446,7 +452,7 @@ int nfs4_do_setattr(struct nfs_server *s * * NOTE: Caller must be holding the sp->so_owner semaphore! */ -int _nfs4_do_close(struct inode *inode, struct nfs4_state *state) +static int _nfs4_do_close(struct inode *inode, struct nfs4_state *state) { struct nfs4_state_owner *sp = state->owner; int status = 0; @@ -475,7 +481,27 @@ int _nfs4_do_close(struct inode *inode, return status; } -int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) +int nfs4_do_close(struct inode *inode, struct nfs4_state *state) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs4_do_close(inode, state); + switch (err) { + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + nfs4_schedule_state_recovery(server->nfs4_state); + case 0: + state->state = 0; + return 0; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) { struct nfs4_state_owner *sp = state->owner; int status = 0; @@ -500,6 +526,26 @@ int _nfs4_do_downgrade(struct inode *ino return status; } +int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs4_do_downgrade(inode, state, mode); + switch (err) { + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + nfs4_schedule_state_recovery(server->nfs4_state); + case 0: + state->state = mode; + return 0; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + struct inode * nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { @@ -1829,6 +1875,8 @@ nfs4_async_handle_error(struct rpc_task case -NFS4ERR_EXPIRED: rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL); nfs4_schedule_state_recovery(clp); + if (test_bit(NFS4CLNT_OK, &clp->cl_state)) + rpc_wake_up_task(task); task->tk_status = 0; return -EAGAIN; case -NFS4ERR_GRACE: @@ -1844,12 +1892,11 @@ nfs4_async_handle_error(struct rpc_task return 0; } -int -nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) +int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) { DEFINE_WAIT(wait); sigset_t oldset; - int interruptible, res; + int interruptible, res = 0; might_sleep(); @@ -1857,19 +1904,12 @@ nfs4_wait_clnt_recover(struct rpc_clnt * interruptible = TASK_UNINTERRUPTIBLE; if (clnt->cl_intr) interruptible = TASK_INTERRUPTIBLE; - do { - res = 0; - prepare_to_wait(&clp->cl_waitq, &wait, interruptible); - nfs4_schedule_state_recovery(clp); - if (test_bit(NFS4CLNT_OK, &clp->cl_state) && - !test_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state)) - break; - if (clnt->cl_intr && signalled()) { - res = -ERESTARTSYS; - break; - } + prepare_to_wait(&clp->cl_waitq, &wait, interruptible); + nfs4_schedule_state_recovery(clp); + if (clnt->cl_intr && signalled()) + res = -ERESTARTSYS; + else if (!test_bit(NFS4CLNT_OK, &clp->cl_state)) schedule(); - } while(!test_bit(NFS4CLNT_OK, &clp->cl_state)); finish_wait(&clp->cl_waitq, &wait); rpc_clnt_sigunmask(clnt, &oldset); return res; @@ -2072,6 +2112,7 @@ static int _nfs4_proc_getlk(struct nfs4_ struct nfs4_lock_state *lsp; int status; + down_read(&clp->cl_sem); nlo.clientid = clp->cl_clientid; down(&state->lock_sema); lsp = nfs4_find_lock_state(state, request->fl_owner); @@ -2105,6 +2146,7 @@ static int _nfs4_proc_getlk(struct nfs4_ if (lsp) nfs4_put_lock_state(lsp); up(&state->lock_sema); + up_read(&clp->cl_sem); return status; } @@ -2125,6 +2167,7 @@ static int _nfs4_proc_unlck(struct nfs4_ { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_client *clp = server->nfs4_state; struct nfs_lockargs arg = { .fh = NFS_FH(inode), .type = nfs4_lck_type(cmd, request), @@ -2144,6 +2187,7 @@ static int _nfs4_proc_unlck(struct nfs4_ struct nfs_locku_opargs luargs; int status = 0; + down_read(&clp->cl_sem); down(&state->lock_sema); lsp = nfs4_find_lock_state(state, request->fl_owner); if (!lsp) @@ -2164,6 +2208,7 @@ out: up(&state->lock_sema); if (status == 0) posix_lock_file(request->fl_file, request); + up_read(&clp->cl_sem); return status; } @@ -2184,6 +2229,7 @@ static int _nfs4_proc_setlk(struct nfs4_ { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_client *clp = server->nfs4_state; struct nfs4_lock_state *lsp; struct nfs_lockargs arg = { .fh = NFS_FH(inode), @@ -2205,6 +2251,7 @@ static int _nfs4_proc_setlk(struct nfs4_ }; int status; + down_read(&clp->cl_sem); down(&state->lock_sema); lsp = nfs4_find_lock_state(state, request->fl_owner); if (lsp == NULL) { @@ -2258,6 +2305,7 @@ out: if (posix_lock_file_wait(request->fl_file, request) < 0) printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); } + up_read(&clp->cl_sem); return status; } diff -u --recursive --new-file --show-c-function linux-2.6.7-24-nfs4_exception/fs/nfs/nfs4state.c linux-2.6.7-25-reboot_recovery/fs/nfs/nfs4state.c --- linux-2.6.7-24-nfs4_exception/fs/nfs/nfs4state.c 2004-07-02 13:36:04.000000000 -0400 +++ linux-2.6.7-25-reboot_recovery/fs/nfs/nfs4state.c 2004-07-02 13:44:03.000000000 -0400 @@ -106,7 +106,7 @@ nfs4_alloc_client(struct in_addr *addr) INIT_LIST_HEAD(&clp->cl_superblocks); init_waitqueue_head(&clp->cl_waitq); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); - clp->cl_state = 1 << NFS4CLNT_NEW; + clp->cl_state = 1 << NFS4CLNT_OK; } return clp; } @@ -169,6 +169,16 @@ nfs4_put_client(struct nfs4_client *clp) nfs4_free_client(clp); } +int nfs4_init_client(struct nfs4_client *clp) +{ + int status = nfs4_proc_setclientid(clp, 0, 0); + if (status == 0) + status = nfs4_proc_setclientid_confirm(clp); + if (status == 0) + nfs4_schedule_state_renewal(clp); + return status; +} + u32 nfs4_alloc_lockowner_id(struct nfs4_client *clp) { @@ -185,7 +195,6 @@ nfs4_client_grab_unused(struct nfs4_clie atomic_inc(&sp->so_count); sp->so_cred = cred; list_move(&sp->so_list, &clp->cl_state_owners); - sp->so_generation = clp->cl_generation; clp->cl_nunused--; } return sp; @@ -237,8 +246,11 @@ nfs4_unhash_state_owner(struct nfs4_stat spin_unlock(&clp->cl_lock); } -struct nfs4_state_owner * -nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) +/* + * Note: must be called with clp->cl_sem held in order to prevent races + * with reboot recovery! + */ +struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) { struct nfs4_client *clp = server->nfs4_state; struct nfs4_state_owner *sp, *new; @@ -254,23 +266,23 @@ nfs4_get_state_owner(struct nfs_server * new->so_client = clp; new->so_id = nfs4_alloc_lockowner_id(clp); new->so_cred = cred; - new->so_generation = clp->cl_generation; sp = new; new = NULL; } spin_unlock(&clp->cl_lock); if (new) kfree(new); - if (sp) { - if (!test_bit(NFS4CLNT_OK, &clp->cl_state)) - nfs4_wait_clnt_recover(server->client, clp); - } else - put_rpccred(cred); - return sp; + if (sp != NULL) + return sp; + put_rpccred(cred); + return NULL; } -void -nfs4_put_state_owner(struct nfs4_state_owner *sp) +/* + * Must be called with clp->cl_sem held in order to avoid races + * with state recovery... + */ +void nfs4_put_state_owner(struct nfs4_state_owner *sp) { struct nfs4_client *clp = sp->so_client; struct rpc_cred *cred = sp->so_cred; @@ -330,8 +342,6 @@ __nfs4_find_state(struct inode *inode, s continue; if ((state->state & mode) != mode) continue; - /* Add the state to the head of the inode's list */ - list_move(&state->inode_states, &nfsi->open_states); atomic_inc(&state->count); if (mode & FMODE_READ) state->nreaders++; @@ -353,8 +363,6 @@ __nfs4_find_state_byowner(struct inode * if (state->nreaders == 0 && state->nwriters == 0) continue; if (state->owner == owner) { - /* Add the state to the head of the inode's list */ - list_move(&state->inode_states, &nfsi->open_states); atomic_inc(&state->count); return state; } @@ -411,55 +419,40 @@ out: return state; } -static void -__nfs4_put_open_state(struct nfs4_state *state) +/* + * Beware! Caller must be holding exactly one + * reference to clp->cl_sem and owner->so_sema! + */ +void nfs4_put_open_state(struct nfs4_state *state) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - struct nfs4_exception exception = { }; - int status = 0; - if (!atomic_dec_and_lock(&state->count, &inode->i_lock)) { - up(&owner->so_sema); + if (!atomic_dec_and_lock(&state->count, &inode->i_lock)) return; - } if (!list_empty(&state->inode_states)) list_del(&state->inode_states); spin_unlock(&inode->i_lock); list_del(&state->open_states); - if (state->state != 0) { - for (;;) { - status = _nfs4_do_close(inode, state); - up(&owner->so_sema); - if (!status) - break; - status = nfs4_handle_exception(NFS_SERVER(inode), status, &exception); - if (!exception.retry) - break; - down(&owner->so_sema); - } - } else - up(&owner->so_sema); + BUG_ON (state->state != 0); nfs4_free_open_state(state); nfs4_put_state_owner(owner); } -void -nfs4_put_open_state(struct nfs4_state *state) -{ - down(&state->owner->so_sema); - __nfs4_put_open_state(state); -} - -void -nfs4_close_state(struct nfs4_state *state, mode_t mode) +/* + * Beware! Caller must be holding no references to clp->cl_sem! + * of owner->so_sema! + */ +void nfs4_close_state(struct nfs4_state *state, mode_t mode) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - struct nfs4_exception exception = { }; + struct nfs4_client *clp = owner->so_client; int newstate; int status = 0; + atomic_inc(&owner->so_count); + down_read(&clp->cl_sem); down(&owner->so_sema); /* Protect against nfs4_find_state() */ spin_lock(&inode->i_lock); @@ -470,29 +463,24 @@ nfs4_close_state(struct nfs4_state *stat if (state->nwriters == 0 && state->nreaders == 0) list_del_init(&state->inode_states); spin_unlock(&inode->i_lock); - do { - newstate = 0; - if (state->state == 0) - break; + newstate = 0; + if (state->state != 0) { if (state->nreaders) newstate |= FMODE_READ; if (state->nwriters) newstate |= FMODE_WRITE; if (state->state == newstate) - break; + goto out; if (newstate != 0) - status = _nfs4_do_downgrade(inode, state, newstate); + status = nfs4_do_downgrade(inode, state, newstate); else - status = _nfs4_do_close(inode, state); - if (!status) { - state->state = newstate; - break; - } - up(&owner->so_sema); - status = nfs4_handle_exception(NFS_SERVER(inode), status, &exception); - down(&owner->so_sema); - } while (exception.retry); - __nfs4_put_open_state(state); + status = nfs4_do_close(inode, state); + } +out: + nfs4_put_open_state(state); + up(&owner->so_sema); + nfs4_put_state_owner(owner); + up_read(&clp->cl_sem); } /* @@ -571,10 +559,9 @@ nfs4_copy_stateid(nfs4_stateid *dst, str } /* -* Called with state->lock_sema held. +* Called with state->lock_sema and clp->cl_sem held. */ -void -nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) { if (status == NFS_OK || seqid_mutating_err(-status)) lsp->ls_seqid++; @@ -661,14 +648,13 @@ nfs4_put_lock_state(struct nfs4_lock_sta } /* -* Called with sp->so_sema held. +* Called with sp->so_sema and clp->cl_sem held. * * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or * failed with a seqid incrementing error - * see comments nfs_fs.h:seqid_mutating_error() */ -void -nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) +void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) { if (status == NFS_OK || seqid_mutating_err(-status)) sp->so_seqid++; @@ -697,21 +683,14 @@ nfs4_recover_state(void *data) init_completion(&args.complete); - down_read(&clp->cl_sem); - if (test_and_set_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state)) - goto out_failed; if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0) goto out_failed_clear; wait_for_completion(&args.complete); return; out_failed_clear: - smp_mb__before_clear_bit(); - clear_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state); - smp_mb__after_clear_bit(); + set_bit(NFS4CLNT_OK, &clp->cl_state); wake_up_all(&clp->cl_waitq); rpc_wake_up(&clp->cl_rpcwaitq); -out_failed: - up_read(&clp->cl_sem); } /* @@ -722,10 +701,8 @@ nfs4_schedule_state_recovery(struct nfs4 { if (!clp) return; - smp_mb__before_clear_bit(); - clear_bit(NFS4CLNT_OK, &clp->cl_state); - smp_mb__after_clear_bit(); - schedule_work(&clp->cl_recoverd); + if (test_and_clear_bit(NFS4CLNT_OK, &clp->cl_state)) + schedule_work(&clp->cl_recoverd); } static int @@ -766,75 +743,50 @@ out_err: return status; } -static int -reclaimer(void *ptr) +static int reclaimer(void *ptr) { struct reclaimer_args *args = (struct reclaimer_args *)ptr; struct nfs4_client *clp = args->clp; struct nfs4_state_owner *sp; - int generation; int status; daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr)); allow_signal(SIGKILL); + atomic_inc(&clp->cl_count); complete(&args->complete); + /* Ensure exclusive access to NFSv4 state */ + down_write(&clp->cl_sem); /* Are there any NFS mounts out there? */ if (list_empty(&clp->cl_superblocks)) goto out; - if (!test_bit(NFS4CLNT_NEW, &clp->cl_state)) { - status = nfs4_proc_renew(clp); - if (status == 0) { - set_bit(NFS4CLNT_OK, &clp->cl_state); - goto out; - } - } - status = nfs4_proc_setclientid(clp, 0, 0); - if (status) - goto out_error; - status = nfs4_proc_setclientid_confirm(clp); +restart_loop: + status = nfs4_proc_renew(clp); + if (status == 0) + goto out; + status = nfs4_init_client(clp); if (status) goto out_error; - generation = ++(clp->cl_generation); - clear_bit(NFS4CLNT_NEW, &clp->cl_state); - set_bit(NFS4CLNT_OK, &clp->cl_state); - up_read(&clp->cl_sem); - nfs4_schedule_state_renewal(clp); -restart_loop: - spin_lock(&clp->cl_lock); + /* Note: list is protected by exclusive lock on cl->cl_sem */ list_for_each_entry(sp, &clp->cl_state_owners, so_list) { - if (sp->so_generation - generation >= 0) - continue; - atomic_inc(&sp->so_count); - spin_unlock(&clp->cl_lock); - down(&sp->so_sema); - if (sp->so_generation - generation < 0) { - smp_rmb(); - sp->so_generation = clp->cl_generation; - status = nfs4_reclaim_open_state(sp); - } - up(&sp->so_sema); - nfs4_put_state_owner(sp); + status = nfs4_reclaim_open_state(sp); if (status < 0) { if (status == -NFS4ERR_STALE_CLIENTID) - nfs4_schedule_state_recovery(clp); - goto out; + goto restart_loop; + goto out_error; } - goto restart_loop; } - spin_unlock(&clp->cl_lock); out: - smp_mb__before_clear_bit(); - clear_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state); - smp_mb__after_clear_bit(); + set_bit(NFS4CLNT_OK, &clp->cl_state); + up_write(&clp->cl_sem); wake_up_all(&clp->cl_waitq); rpc_wake_up(&clp->cl_rpcwaitq); + nfs4_put_client(clp); return 0; out_error: - printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u\n", - NIPQUAD(clp->cl_addr.s_addr)); - up_read(&clp->cl_sem); + printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", + NIPQUAD(clp->cl_addr.s_addr), -status); goto out; } diff -u --recursive --new-file --show-c-function linux-2.6.7-24-nfs4_exception/include/linux/nfs_fs.h linux-2.6.7-25-reboot_recovery/include/linux/nfs_fs.h --- linux-2.6.7-24-nfs4_exception/include/linux/nfs_fs.h 2004-07-02 13:36:04.000000000 -0400 +++ linux-2.6.7-25-reboot_recovery/include/linux/nfs_fs.h 2004-07-02 13:44:03.000000000 -0400 @@ -506,8 +506,6 @@ struct idmap; enum nfs4_client_state { NFS4CLNT_OK = 0, - NFS4CLNT_NEW, - NFS4CLNT_SETUP_STATE, }; /* @@ -519,7 +517,6 @@ struct nfs4_client { u64 cl_clientid; /* constant */ nfs4_verifier cl_confirm; unsigned long cl_state; - long cl_generation; u32 cl_lockowner_id; @@ -572,9 +569,7 @@ struct nfs4_state_owner { u32 so_id; /* 32-bit identifier, unique */ struct semaphore so_sema; u32 so_seqid; /* protected by so_sema */ - unsigned int so_flags; /* protected by so_sema */ atomic_t so_count; - long so_generation; struct rpc_cred *so_cred; /* Associated cred */ struct list_head so_states; @@ -642,8 +637,8 @@ extern int nfs4_proc_setclientid_confirm extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *); extern int nfs4_proc_async_renew(struct nfs4_client *); extern int nfs4_proc_renew(struct nfs4_client *); -extern int _nfs4_do_close(struct inode *, struct nfs4_state *); -extern int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode); +extern int nfs4_do_close(struct inode *, struct nfs4_state *); +extern int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode); extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *); extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); @@ -659,6 +654,7 @@ extern void init_nfsv4_state(struct nfs_ extern void destroy_nfsv4_state(struct nfs_server *); extern struct nfs4_client *nfs4_get_client(struct in_addr *); extern void nfs4_put_client(struct nfs4_client *clp); +extern int nfs4_init_client(struct nfs4_client *clp); extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);