NFSv4: Allow recovery from network partitions Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 8 +++ fs/nfs/nfs4proc.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4state.c | 34 +++++++++++----- include/linux/nfs_fs.h | 2 4 files changed, 137 insertions(+), 11 deletions(-) Index: linux-2.6.11/fs/nfs/nfs4proc.c =================================================================== --- linux-2.6.11.orig/fs/nfs/nfs4proc.c +++ linux-2.6.11/fs/nfs/nfs4proc.c @@ -436,6 +436,100 @@ out: } /* + * OPEN_EXPIRED: + * reclaim state on the server after a network partition. + * Assumes caller holds the appropriate lock + */ +static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +{ + struct dentry *parent = dget_parent(dentry); + struct inode *dir = parent->d_inode; + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs_delegation *delegation = NFS_I(inode)->delegation; + struct nfs_fattr f_attr = { + .valid = 0, + }; + struct nfs_openargs o_arg = { + .fh = NFS_FH(dir), + .open_flags = state->state, + .name = &dentry->d_name, + .bitmask = server->attr_bitmask, + .claim = NFS4_OPEN_CLAIM_NULL, + }; + struct nfs_openres o_res = { + .f_attr = &f_attr, + .server = server, + }; + int status = 0; + + if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { + status = _nfs4_do_access(inode, sp->so_cred, state->state); + if (status < 0) + goto out; + memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid)); + set_bit(NFS_DELEGATED_STATE, &state->flags); + goto out; + } + status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); + if (status != 0) + goto out_nodeleg; + /* Check if files differ */ + if ((f_attr.mode & S_IFMT) != (inode->i_mode & S_IFMT)) + goto out_stale; + /* Has the file handle changed? */ + if (nfs_compare_fh(&o_res.fh, NFS_FH(inode)) != 0) { + /* Verify if the change attributes are the same */ + if (f_attr.change_attr != NFS_I(inode)->change_attr) + goto out_stale; + if (nfs_size_to_loff_t(f_attr.size) != inode->i_size) + goto out_stale; + /* Lets just pretend that this is the same file */ + nfs_copy_fh(NFS_FH(inode), &o_res.fh); + NFS_I(inode)->fileid = f_attr.fileid; + } + memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); + if (o_res.delegation_type != 0) { + if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) + nfs_inode_set_delegation(inode, sp->so_cred, &o_res); + else + nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); + } +out_nodeleg: + clear_bit(NFS_DELEGATED_STATE, &state->flags); +out: + dput(parent); + return status; +out_stale: + status = -ESTALE; + /* Invalidate the state owner so we don't ever use it again */ + nfs4_drop_state_owner(sp); + d_drop(dentry); + /* Should we be trying to close that stateid? */ + goto out_nodeleg; +} + +static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct nfs_inode *nfsi = NFS_I(state->inode); + struct nfs_open_context *ctx; + int status; + + spin_lock(&state->inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + if (ctx->state != state) + continue; + get_nfs_open_context(ctx); + spin_unlock(&state->inode->i_lock); + status = _nfs4_open_expired(sp, state, ctx->dentry); + put_nfs_open_context(ctx); + return status; + } + spin_unlock(&state->inode->i_lock); + return -ENOENT; +} + +/* * Returns an nfs4_state + an extra reference to the inode */ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res) @@ -2563,6 +2657,11 @@ static int nfs4_lock_reclaim(struct nfs4 return _nfs4_do_setlk(state, F_SETLK, request, 1); } +static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) +{ + return _nfs4_do_setlk(state, F_SETLK, request, 0); +} + static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct nfs4_client *clp = state->owner->so_client; @@ -2637,6 +2736,11 @@ struct nfs4_state_recovery_ops nfs4_rebo .recover_lock = nfs4_lock_reclaim, }; +struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = { + .recover_open = nfs4_open_expired, + .recover_lock = nfs4_lock_expired, +}; + struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, Index: linux-2.6.11/include/linux/nfs_fs.h =================================================================== --- linux-2.6.11.orig/include/linux/nfs_fs.h +++ linux-2.6.11/include/linux/nfs_fs.h @@ -692,6 +692,7 @@ extern struct inode *nfs4_atomic_open(st extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; +extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs4_client *); @@ -709,6 +710,7 @@ extern u32 nfs4_alloc_lockowner_id(struc extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); +extern void nfs4_drop_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, mode_t); Index: linux-2.6.11/fs/nfs/nfs4state.c =================================================================== --- linux-2.6.11.orig/fs/nfs/nfs4state.c +++ linux-2.6.11/fs/nfs/nfs4state.c @@ -280,8 +280,8 @@ nfs4_alloc_state_owner(void) return sp; } -static void -nfs4_unhash_state_owner(struct nfs4_state_owner *sp) +void +nfs4_drop_state_owner(struct nfs4_state_owner *sp) { struct nfs4_client *clp = sp->so_client; spin_lock(&clp->cl_lock); @@ -720,7 +720,7 @@ void nfs4_increment_seqid(int status, st sp->so_seqid++; /* If the server returns BAD_SEQID, unhash state_owner here */ if (status == -NFS4ERR_BAD_SEQID) - nfs4_unhash_state_owner(sp); + nfs4_drop_state_owner(sp); } static int reclaimer(void *); @@ -833,8 +833,7 @@ static int nfs4_reclaim_open_state(struc default: printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", __FUNCTION__, status); - case -NFS4ERR_EXPIRED: - case -NFS4ERR_NO_GRACE: + case -ENOENT: case -NFS4ERR_RECLAIM_BAD: case -NFS4ERR_RECLAIM_CONFLICT: /* @@ -846,6 +845,8 @@ static int nfs4_reclaim_open_state(struc /* Mark the file as being 'closed' */ state->state = 0; break; + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: case -NFS4ERR_STALE_CLIENTID: goto out_err; } @@ -877,21 +878,34 @@ static int reclaimer(void *ptr) goto out; restart_loop: status = nfs4_proc_renew(clp); - if (status == 0 || status == -NFS4ERR_CB_PATH_DOWN) - goto out; - ops = &nfs4_reboot_recovery_ops; + switch (status) { + case 0: + case -NFS4ERR_CB_PATH_DOWN: + goto out; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_LEASE_MOVED: + ops = &nfs4_reboot_recovery_ops; + break; + default: + ops = &nfs4_network_partition_recovery_ops; + }; status = __nfs4_init_client(clp); if (status) goto out_error; - /* Mark all delagations for reclaim */ + /* Mark all delegations for reclaim */ nfs_delegation_mark_reclaim(clp); /* Note: list is protected by exclusive lock on cl->cl_sem */ list_for_each_entry(sp, &clp->cl_state_owners, so_list) { status = nfs4_reclaim_open_state(ops, sp); if (status < 0) { + if (status == -NFS4ERR_NO_GRACE) { + ops = &nfs4_network_partition_recovery_ops; + status = nfs4_reclaim_open_state(ops, sp); + } if (status == -NFS4ERR_STALE_CLIENTID) goto restart_loop; - goto out_error; + if (status == -NFS4ERR_EXPIRED) + goto restart_loop; } } nfs_delegation_reap_unclaimed(clp); Index: linux-2.6.11/fs/nfs/inode.c =================================================================== --- linux-2.6.11.orig/fs/nfs/inode.c +++ linux-2.6.11/fs/nfs/inode.c @@ -863,6 +863,12 @@ struct nfs_open_context *get_nfs_open_co void put_nfs_open_context(struct nfs_open_context *ctx) { if (atomic_dec_and_test(&ctx->count)) { + if (!list_empty(&ctx->list)) { + struct inode *inode = ctx->dentry->d_inode; + spin_lock(&inode->i_lock); + list_del(&ctx->list); + spin_unlock(&inode->i_lock); + } if (ctx->state != NULL) nfs4_close_state(ctx->state, ctx->mode); if (ctx->cred != NULL) @@ -911,7 +917,7 @@ void nfs_file_clear_open_context(struct if (ctx) { filp->private_data = NULL; spin_lock(&inode->i_lock); - list_del(&ctx->list); + list_move_tail(&ctx->list, &NFS_I(inode)->open_files); spin_unlock(&inode->i_lock); put_nfs_open_context(ctx); }