NFS: Attempt to recover when migrating to a replicated filesystem. If the replica does not preserve filehandles and/or inode numbers, then we can try to walk the path in order to recover. This is definitely a case of walking on eggshells, though. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 22 +++----- include/linux/nfs_fs.h | 2 include/linux/nfs_fs_sb.h | 1 4 files changed, 125 insertions(+), 14 deletions(-) Index: linux-2.6.12-rc1/fs/nfs/inode.c =================================================================== --- linux-2.6.12-rc1.orig/fs/nfs/inode.c +++ linux-2.6.12-rc1/fs/nfs/inode.c @@ -1566,6 +1566,120 @@ static int nfs_update_inode(struct inode } /* + * nfs_try_migrate_filehandle - Check if we can migrate the inode filehandle + * @inode - pointer to inode + * @fh - the filehandle resulting from lookup() + * @fattr - attributes associated with the new filehandle + * + * Do our very best to update existing inodes when the user wants to migrate + * this filesystem to a replica server. + * + * Note that here be HUGE dragons, with endless possibilities for causing + * trouble... + */ +int nfs_try_migrate_filehandle(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr, uint32_t generation) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + /* Argh! The basic file type has changed */ + if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + goto out_bad; + /* Fileid + filehandle are the same. Good! */ + if (nfs_compare_fh(&nfsi->fh, fh) == 0 && nfsi->fileid == fattr->fileid) + goto out_good; + if (fattr->valid && NFS_ATTR_FATTR_V4) { + /* Do the NFSv4 change attributes match our cached value? */ + if (nfsi->change_attr != fattr->change_attr) + goto out_bad; + } else { + /* Does the ctime match? */ + if (!timespec_equal(&fattr->ctime, &inode->i_ctime)) + goto out_bad; + } + /* Does the file size match? */ + if (nfs_size_to_loff_t(fattr->size) != inode->i_size) + goto out_bad; + /* FIXME: Here lie the biggest dragons: + * Try considering all possible races w.r.t. iget5_locked() + */ + nfs_copy_fh(&nfsi->fh, fh); + if (nfsi->fileid != fattr->fileid) { + /* The very concept of migrating to a new hash bucket + * is so full of holes and races that it defies belief! + */ + remove_inode_hash(inode); + nfsi->fileid = fattr->fileid; + inode->i_ino = nfs_fattr_to_ino_t(fattr); + insert_inode_hash(inode); + } +out_good: + inode->i_generation = generation; + return 0; +out_bad: + return -EIO; +} + +/* + * nfs_try_migrate_inode - Update an inode's filehandle after migration + * @inode - pointer to inode to migrate + * @dentry - pointer to dentry + */ +int nfs_try_migrate_inode(struct inode *inode, struct dentry *dentry) +{ + struct nfs_fh fh; + struct nfs_fattr fattr; + struct dentry *next, *next_parent; + uint32_t generation; + int status; + + if (dentry == NULL) { + status = -ENOENT; + dentry = d_find_alias(inode); + if (dentry == NULL) + goto out; + } else + dget(dentry); +repeat: + /* Has this inode already been revalidated? */ + status = 0; + generation = NFS_SERVER(inode)->generation; + if ((long)generation - (long)inode->i_generation <= 0) + goto out; + /* No. Search for a previously revalidated path element */ + next = dget(dentry); + next_parent = dget_parent(dentry); + while((long)generation - (long)next_parent->d_inode->i_generation > 0) { + BUG_ON(IS_ROOT(next_parent)); + dput(next); + next = next_parent; + next_parent = dget_parent(next); + } + status = NFS_PROTO(inode)->lookup(next_parent->d_inode, &next->d_name, + &fh, &fattr); + if (status == 0) + status = nfs_try_migrate_filehandle(next->d_inode, &fh, &fattr, generation); + switch (status) { + case -ESTALE: + if (IS_ROOT(next_parent)) + break; + case 0: + if (dentry->d_inode == inode) + break; + dput(next_parent); + dput(next); + goto repeat; + default: + d_drop(next); + } + dput(next_parent); + dput(next); +out: + dput(dentry); + dprintk("%s: returned error %d\n", __FUNCTION__, status); + return status; +} + +/* * File system information */ Index: linux-2.6.12-rc1/fs/nfs/nfs4proc.c =================================================================== --- linux-2.6.12-rc1.orig/fs/nfs/nfs4proc.c +++ linux-2.6.12-rc1/fs/nfs/nfs4proc.c @@ -461,6 +461,7 @@ static int _nfs4_open_expired(struct nfs .f_attr = &f_attr, .server = server, }; + uint32_t generation; int status = 0; if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { @@ -471,23 +472,17 @@ static int _nfs4_open_expired(struct nfs set_bit(NFS_DELEGATED_STATE, &state->flags); goto out; } + /* If we are in a failover situation, recover path first */ + status = nfs_try_migrate_inode(dir, parent); + if (status != 0) + goto out_nodeleg; + generation = server->generation; status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) goto out_nodeleg; - /* Check if files differ */ - if ((f_attr.mode & S_IFMT) != (inode->i_mode & S_IFMT)) + status = nfs_try_migrate_filehandle(inode, &o_res.fh, o_res.f_attr, generation); + if (status != 0) goto out_stale; - /* Has the file handle changed? */ - if (nfs_compare_fh(&o_res.fh, NFS_FH(inode)) != 0) { - /* Verify if the change attributes are the same */ - if (f_attr.change_attr != NFS_I(inode)->change_attr) - goto out_stale; - if (nfs_size_to_loff_t(f_attr.size) != inode->i_size) - goto out_stale; - /* Lets just pretend that this is the same file */ - nfs_copy_fh(NFS_FH(inode), &o_res.fh); - NFS_I(inode)->fileid = f_attr.fileid; - } memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); if (o_res.delegation_type != 0) { if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) @@ -501,7 +496,6 @@ out: dput(parent); return status; out_stale: - status = -ESTALE; /* Invalidate the state owner so we don't ever use it again */ nfs4_drop_state_owner(sp); d_drop(dentry); Index: linux-2.6.12-rc1/include/linux/nfs_fs.h =================================================================== --- linux-2.6.12-rc1.orig/include/linux/nfs_fs.h +++ linux-2.6.12-rc1/include/linux/nfs_fs.h @@ -306,6 +306,8 @@ extern struct vfsmount *nfs_do_submount( const struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); +extern int nfs_try_migrate_inode(struct inode *dir, struct dentry *parent); +extern int nfs_try_migrate_filehandle(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr, uint32_t generation); /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern u32 root_nfs_parse_addr(char *name); /*__init*/ Index: linux-2.6.12-rc1/include/linux/nfs_fs_sb.h =================================================================== --- linux-2.6.12-rc1.orig/include/linux/nfs_fs_sb.h +++ linux-2.6.12-rc1/include/linux/nfs_fs_sb.h @@ -36,6 +36,7 @@ struct nfs_server { struct nfs_fh fh; struct sockaddr_in addr; struct nfs_fsid fsid; + uint32_t generation; #ifdef CONFIG_NFS_V4 /* Our own IP address, as a null-terminated string. * This is used to generate the clientid, and the callback address.