VFS: Retry pathname resolution after encountering ESTALE Add a mechanism for the VFS layer to retry pathname resolution if a file system returns ESTALE at any point during the resolution process. Pathname resolution is retried once from the first component, using all real lookup requests. This provides effective recovery for most cases where files or directories have been replaced by other remote file system clients. It also provides a foundation to build a mechanism by which file system clients can fail over transparently to a replicated server. Test-plan: Combinations of rsync and "ls -l" on multiple clients. No stale file handles should be after directory trees are replaced. Standard performance tests; little or no loss of performance is expected. Created: Fri, 11 Feb 2005 16:46:19 -0500 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/namei.c | 33 ++++++++++++++++++++++++++++++++- fs/nfs/dir.c | 23 +++++++++++++++-------- include/linux/namei.h | 2 ++ 3 files changed, 49 insertions(+), 9 deletions(-) Index: linux-2.6.11-rc3/fs/namei.c =================================================================== --- linux-2.6.11-rc3.orig/fs/namei.c +++ linux-2.6.11-rc3/fs/namei.c @@ -681,7 +681,7 @@ fail: * * We expect 'base' to be positive and a directory. */ -int fastcall link_path_walk(const char * name, struct nameidata *nd) +static fastcall int __link_path_walk(const char * name, struct nameidata *nd) { struct path next; struct inode *inode; @@ -881,6 +881,37 @@ return_err: return err; } +/* + * Wrapper to retry pathname resolution whenever the underlying + * file system returns an ESTALE. + * + * Retry the whole path once, forcing real lookup requests + * instead of relying on the dcache. + */ +int fastcall link_path_walk(const char *name, struct nameidata *nd) +{ + struct nameidata save = *nd; + int result; + + /* make sure the stuff we saved doesn't go away */ + dget(save.dentry); + mntget(save.mnt); + + result = __link_path_walk(name, nd); + if (result == -ESTALE) { + *nd = save; + dget(nd->dentry); + mntget(nd->mnt); + nd->flags |= LOOKUP_REVAL; + result = __link_path_walk(name, nd); + } + + dput(save.dentry); + mntput(save.mnt); + + return result; +} + int fastcall path_walk(const char * name, struct nameidata *nd) { current->total_link_count = 0; Index: linux-2.6.11-rc3/fs/nfs/dir.c =================================================================== --- linux-2.6.11-rc3.orig/fs/nfs/dir.c +++ linux-2.6.11-rc3/fs/nfs/dir.c @@ -529,13 +529,24 @@ static inline void nfs_renew_times(struc } static inline -int nfs_lookup_verify_inode(struct inode *inode, int isopen) +int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) { struct nfs_server *server = NFS_SERVER(inode); - if (isopen && !(server->flags & NFS_MOUNT_NOCTO)) - return __nfs_revalidate_inode(server, inode); + if (nd != NULL) { + int ndflags = nd->flags; + /* VFS wants an on-the-wire revalidation */ + if (ndflags & LOOKUP_REVAL) + goto out_force; + /* This is an open(2) */ + if ((ndflags & LOOKUP_OPEN) && + !(ndflags & LOOKUP_CONTINUE) && + !(server->flags & NFS_MOUNT_NOCTO)) + goto out_force; + } return nfs_revalidate_inode(server, inode); +out_force: + return __nfs_revalidate_inode(server, inode); } /* @@ -579,16 +590,12 @@ static int nfs_lookup_revalidate(struct struct nfs_fh fhandle; struct nfs_fattr fattr; unsigned long verifier; - int isopen = 0; parent = dget_parent(dentry); lock_kernel(); dir = parent->d_inode; inode = dentry->d_inode; - if (nd && !(nd->flags & LOOKUP_CONTINUE) && (nd->flags & LOOKUP_OPEN)) - isopen = 1; - if (!inode) { if (nfs_neg_need_reval(dir, dentry, nd)) goto out_bad; @@ -606,7 +613,7 @@ static int nfs_lookup_revalidate(struct /* Force a full look up iff the parent directory has changed */ if (nfs_check_verifier(dir, dentry)) { - if (nfs_lookup_verify_inode(inode, isopen)) + if (nfs_lookup_verify_inode(inode, nd)) goto out_zap_parent; goto out_valid; } Index: linux-2.6.11-rc3/include/linux/namei.h =================================================================== --- linux-2.6.11-rc3.orig/include/linux/namei.h +++ linux-2.6.11-rc3/include/linux/namei.h @@ -39,12 +39,14 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA * - ending slashes ok even for nonexistent files * - internal "there are more path compnents" flag * - locked when lookup done with dcache_lock held + * - dentry cache is untrusted; force a real lookup */ #define LOOKUP_FOLLOW 1 #define LOOKUP_DIRECTORY 2 #define LOOKUP_CONTINUE 4 #define LOOKUP_PARENT 16 #define LOOKUP_NOALT 32 +#define LOOKUP_REVAL 64 /* * Intent data */