All of the above
---
 Documentation/filesystems/caching/backend-api.txt |  357 +++++
 Documentation/filesystems/caching/cachefiles.txt  |  281 ++++
 Documentation/filesystems/caching/fscache.txt     |  151 ++
 Documentation/filesystems/caching/netfs-api.txt   |  752 +++++++++++
 Documentation/sysctl/fs.txt                       |    6 
 fs/Kconfig                                        |   49 +
 fs/Makefile                                       |    2 
 fs/afs/cache.h                                    |   27 
 fs/afs/cell.c                                     |  109 +-
 fs/afs/cell.h                                     |   16 
 fs/afs/cmservice.c                                |    2 
 fs/afs/dir.c                                      |   15 
 fs/afs/file.c                                     |  246 ++--
 fs/afs/fsclient.c                                 |    4 
 fs/afs/inode.c                                    |   45 -
 fs/afs/internal.h                                 |   25 
 fs/afs/main.c                                     |   24 
 fs/afs/mntpt.c                                    |   12 
 fs/afs/proc.c                                     |    1 
 fs/afs/server.c                                   |    3 
 fs/afs/vlocation.c                                |  179 ++-
 fs/afs/vnode.c                                    |  248 +++-
 fs/afs/vnode.h                                    |   10 
 fs/afs/volume.c                                   |   78 -
 fs/afs/volume.h                                   |   28 
 fs/autofs4/autofs_i.h                             |    3 
 fs/autofs4/init.c                                 |    2 
 fs/autofs4/inode.c                                |   22 
 fs/autofs4/waitq.c                                |    1 
 fs/buffer.c                                       |    2 
 fs/cachefiles/Makefile                            |   18 
 fs/cachefiles/cf-bind.c                           |  283 ++++
 fs/cachefiles/cf-interface.c                      | 1315 +++++++++++++++++++
 fs/cachefiles/cf-key.c                            |  160 ++
 fs/cachefiles/cf-main.c                           |  131 ++
 fs/cachefiles/cf-namei.c                          |  837 ++++++++++++
 fs/cachefiles/cf-proc.c                           |  510 +++++++
 fs/cachefiles/cf-sysctl.c                         |   69 +
 fs/cachefiles/cf-xattr.c                          |  299 ++++
 fs/cachefiles/internal.h                          |  308 ++++
 fs/dcache.c                                       |  297 ++++
 fs/ext2/dir.c                                     |    6 
 fs/ext3/inode.c                                   |   10 
 fs/fcntl.c                                        |    2 
 fs/file_table.c                                   |   49 +
 fs/freevxfs/vxfs_subr.c                           |    2 
 fs/fscache/Makefile                               |   11 
 fs/fscache/cookie.c                               | 1063 ++++++++++++++++
 fs/fscache/fscache-int.h                          |   93 +
 fs/fscache/fsdef.c                                |  113 ++
 fs/fscache/main.c                                 |  105 ++
 fs/fscache/page.c                                 |  548 ++++++++
 fs/nfs/Makefile                                   |    7 
 fs/nfs/callback.c                                 |   31 
 fs/nfs/callback.h                                 |    7 
 fs/nfs/callback_proc.c                            |   13 
 fs/nfs/client.c                                   | 1439 +++++++++++++++++++++
 fs/nfs/delegation.c                               |   35 -
 fs/nfs/delegation.h                               |   10 
 fs/nfs/dir.c                                      |   18 
 fs/nfs/file.c                                     |   35 -
 fs/nfs/fscache.c                                  |  349 +++++
 fs/nfs/fscache.h                                  |  466 +++++++
 fs/nfs/getroot.c                                  |  306 ++++
 fs/nfs/idmap.c                                    |   39 -
 fs/nfs/inode.c                                    |   35 -
 fs/nfs/internal.h                                 |  134 +-
 fs/nfs/namespace.c                                |   33 
 fs/nfs/nfs3proc.c                                 |    8 
 fs/nfs/nfs4_fs.h                                  |   78 -
 fs/nfs/nfs4namespace.c                            |  118 +-
 fs/nfs/nfs4proc.c                                 |  165 +-
 fs/nfs/nfs4renewd.c                               |   19 
 fs/nfs/nfs4state.c                                |  174 ---
 fs/nfs/nfs4xdr.c                                  |   42 -
 fs/nfs/pagelist.c                                 |    3 
 fs/nfs/proc.c                                     |    4 
 fs/nfs/read.c                                     |   32 
 fs/nfs/super.c                                    | 1400 +++++++-------------
 fs/nfs/sysctl.c                                   |   43 +
 fs/nfs/write.c                                    |   13 
 fs/open.c                                         |   20 
 fs/reiserfs/inode.c                               |   10 
 fs/super.c                                        |   12 
 fs/ufs/dir.c                                      |    6 
 include/linux/dcache.h                            |    2 
 include/linux/file.h                              |    1 
 include/linux/fs.h                                |   10 
 include/linux/fscache-cache.h                     |  243 ++++
 include/linux/fscache.h                           |  496 +++++++
 include/linux/nfs4_mount.h                        |    1 
 include/linux/nfs_fs.h                            |    8 
 include/linux/nfs_fs_sb.h                         |   94 +
 include/linux/nfs_idmap.h                         |   14 
 include/linux/nfs_mount.h                         |    1 
 include/linux/nfs_xdr.h                           |    4 
 include/linux/page-flags.h                        |   15 
 include/linux/pagemap.h                           |   17 
 include/linux/sysctl.h                            |    1 
 kernel/auditsc.c                                  |    2 
 kernel/sysctl.c                                   |   11 
 mm/filemap.c                                      |  120 ++
 mm/migrate.c                                      |    4 
 mm/page_alloc.c                                   |    2 
 mm/readahead.c                                    |   25 
 105 files changed, 13153 insertions(+), 1951 deletions(-)

diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt
new file mode 100644
index 0000000..1dd601b
--- /dev/null
+++ b/Documentation/filesystems/caching/backend-api.txt
@@ -0,0 +1,357 @@
+			  ==========================
+			  FS-CACHE CACHE BACKEND API
+			  ==========================
+
+The FS-Cache system provides an API by which actual caches can be supplied to
+FS-Cache for it to then serve out to network filesystems and other interested
+parties.
+
+This API is declared in <linux/fscache-cache.h>.
+
+
+====================================
+INITIALISING AND REGISTERING A CACHE
+====================================
+
+To start off, a cache definition must be initialised and registered for each
+cache the backend wants to make available.  For instance, CacheFS does this in
+the fill_super() operation on mounting.
+
+The cache definition (struct fscache_cache) should be initialised by calling:
+
+	void fscache_init_cache(struct fscache_cache *cache,
+				struct fscache_cache_ops *ops,
+				const char *idfmt,
+				...)
+
+Where:
+
+ (*) "cache" is a pointer to the cache definition;
+
+ (*) "ops" is a pointer to the table of operations that the backend supports on
+     this cache;
+
+ (*) and a format and printf-style arguments for constructing a label for the
+     cache.
+
+
+The cache should then be registered with FS-Cache by passing a pointer to the
+previously initialised cache definition to:
+
+	int fscache_add_cache(struct fscache_cache *cache,
+			      struct fscache_object *fsdef,
+			      const char *tagname);
+
+Two extra arguments should also be supplied:
+
+ (*) "fsdef" which should point to the object representation for the FS-Cache
+     master index in this cache.  Netfs primary index entries will be created
+     here.
+
+ (*) "tagname" which, if given, should be a text string naming this cache.  If
+     this is NULL, the identifier will be used instead.  For CacheFS, the
+     identifier is set to name the underlying block device and the tag can be
+     supplied by mount.
+
+This function may return -ENOMEM if it ran out of memory or -EEXIST if the tag
+is already in use.  0 will be returned on success.
+
+
+=====================
+UNREGISTERING A CACHE
+=====================
+
+A cache can be withdrawn from the system by calling this function with a
+pointer to the cache definition:
+
+	void fscache_withdraw_cache(struct fscache_cache *cache)
+
+In CacheFS's case, this is called by put_super().
+
+
+==================
+FS-CACHE UTILITIES
+==================
+
+FS-Cache provides some utilities that a cache backend may make use of:
+
+ (*) Find the parent of an object:
+
+	struct fscache_object *
+	fscache_find_parent_object(struct fscache_object *object)
+
+     This allows a backend to find the logical parent of an index or data file
+     in the cache hierarchy.
+
+ (*) Note occurrence of an I/O error in a cache:
+
+	void fscache_io_error(struct fscache_cache *cache)
+
+     This tells FS-Cache that an I/O error occurred in the cache.  After this
+     has been called, only resource dissociation operations (object and page
+     release) will be passed from the netfs to the cache backend for the
+     specified cache.
+
+     This does not actually withdraw the cache.  That must be done separately.
+
+ (*) Get an extra reference to a read or write context:
+
+	void *fscache_get_context(struct fscache_cookie *cookie, void *context)
+
+     and release a reference:
+
+	void *fscache_put_context(struct fscache_cookie *cookie, void *context)
+
+     These should be used to maintain the presence of the read or write context
+     passed to the cache read/write functions.  This context must then be
+     passed to the I/O completion function.
+
+
+========================
+RELEVANT DATA STRUCTURES
+========================
+
+ (*) Index/Data file FS-Cache representation cookie:
+
+	struct fscache_cookie {
+		struct fscache_object_def	*def;
+		struct fscache_netfs		*netfs;
+		void				*netfs_data;
+		...
+	};
+
+     The fields that might be of use to the backend describe the object
+     definition, the netfs definition and the netfs's data for this cookie.
+     The object definition contain functions supplied by the netfs for loading
+     and matching index entries; these are required to provide some of the
+     cache operations.
+
+ (*) In-cache object representation:
+
+	struct fscache_object {
+		struct fscache_cache		*cache;
+		struct fscache_cookie		*cookie;
+		unsigned long			flags;
+	#define FSCACHE_OBJECT_RECYCLING	1
+		...
+	};
+
+     Structures of this type should be allocated by the cache backend and
+     passed to FS-Cache when requested by the appropriate cache operation.  In
+     the case of CacheFS, they're embedded in CacheFS's internal object
+     structures.
+
+     Each object contains a pointer to the cookie that represents the object it
+     is backing.  It also contains a flag that indicates whether the object is
+     being retired when put_object() is called.  This should be initialised by
+     calling fscache_object_init(object).
+
+
+================
+CACHE OPERATIONS
+================
+
+The cache backend provides FS-Cache with a table of operations that can be
+performed on the denizens of the cache.  These are held in a structure of type:
+
+	struct fscache_cache_ops
+
+ (*) Name of cache provider [mandatory]:
+
+	const char *name
+
+     This isn't strictly an operation, but should be pointed at a string naming
+     the backend.
+
+ (*) Object lookup [mandatory]:
+
+	struct fscache_object *(*lookup_object)(struct fscache_cache *cache,
+						struct fscache_object *parent,
+						struct fscache_cookie *cookie)
+
+     This method is used to look up an object in the specified cache, given a
+     pointer to the parent object and the cookie to which the object will be
+     attached.  This should instantiate that object in the cache if it can, or
+     return -ENOBUFS or -ENOMEM if it can't.
+
+ (*) Increment object refcount [mandatory]:
+
+	struct fscache_object *(*grab_object)(struct fscache_object *object)
+
+     This method is called to increment the reference count on an object.  It
+     may fail (for instance if the cache is being withdrawn) by returning NULL.
+     It should return the object pointer if successful.
+
+ (*) Lock/Unlock object [mandatory]:
+
+	void (*lock_object)(struct fscache_object *object)
+	void (*unlock_object)(struct fscache_object *object)
+
+     These methods are used to exclusively lock an object.  It must be possible
+     to schedule with the lock held, so a spinlock isn't sufficient.
+
+ (*) Pin/Unpin object [optional]:
+
+	int (*pin_object)(struct fscache_object *object)
+	void (*unpin_object)(struct fscache_object *object)
+
+     These methods are used to pin an object into the cache.  Once pinned an
+     object cannot be reclaimed to make space.  Return -ENOSPC if there's not
+     enough space in the cache to permit this.
+
+ (*) Update object [mandatory]:
+
+	int (*update_object)(struct fscache_object *object)
+
+     This is called to update the index entry for the specified object.  The
+     new information should be in object->cookie->netfs_data.  This can be
+     obtained by calling object->cookie->def->get_aux()/get_attr().
+
+ (*) Release object reference [mandatory]:
+
+	void (*put_object)(struct fscache_object *object)
+
+     This method is used to discard a reference to an object.  The object may
+     be destroyed when all the references held by FS-Cache are released.
+
+ (*) Synchronise a cache [mandatory]:
+
+	void (*sync)(struct fscache_cache *cache)
+
+     This is called to ask the backend to synchronise a cache with its backing
+     device.
+
+ (*) Dissociate a cache [mandatory]:
+
+	void (*dissociate_pages)(struct fscache_cache *cache)
+
+     This is called to ask a cache to perform any page dissociations as part of
+     cache withdrawal.
+
+ (*) Set the data size on a cache file [mandatory]:
+
+	int (*set_i_size)(struct fscache_object *object, loff_t i_size);
+
+     This is called to indicate to the cache the maximum size a file may reach.
+     The cache may use this to reserve space on the cache.  It may also return
+     -ENOBUFS to indicate that insufficient space is available to expand the
+     metadata used to track the data.  It should return 0 if successful or
+     -ENOMEM or -EIO on error.
+
+ (*) Reserve cache space for an object's data [optional]:
+
+	int (*reserve_space)(struct fscache_object *object, loff_t size);
+
+     This is called to request that cache space be reserved to hold the data
+     for an object and the metadata used to track it.  Zero size should be
+     taken as request to cancel a reservation.
+
+     This should return 0 if successful, -ENOSPC if there isn't enough space
+     available, or -ENOMEM or -EIO on other errors.
+
+     The reservation may exceed the size of the object, thus permitting future
+     expansion.  If the amount of space consumed by an object would exceed the
+     reservation, it's permitted to refuse requests to allocate pages, but not
+     required.  An object may be pruned down to its reservation size if larger
+     than that already.
+
+ (*) Request page be read from cache [mandatory]:
+
+	int (*read_or_alloc_page)(struct fscache_object *object,
+				  struct page *page,
+				  fscache_rw_complete_t end_io_func,
+				  void *end_io_data,
+				  gfp_t gfp)
+
+     This is called to attempt to read a netfs page from the cache, or to
+     reserve a backing block if not.  FS-Cache will have done as much checking
+     as it can before calling, but most of the work belongs to the backend.
+
+     If there's no page in the cache, then -ENODATA should be returned if the
+     backend managed to reserve a backing block; -ENOBUFS, -ENOMEM or -EIO if
+     it didn't.
+
+     If there is a page in the cache, then a read operation should be queued
+     and 0 returned.  When the read finishes, end_io_func() should be called
+     with the following arguments:
+
+	(*end_io_func)(object->cookie->netfs_data,
+		       page,
+		       end_io_data,
+		       error);
+
+     The mark_pages_cached() cookie operation should be called for the page if
+     any cache metadata is retained.  This will indicate to the netfs that the
+     page needs explicit uncaching.  This operation takes a pagevec, thus
+     allowing several pages to be marked at once.
+
+ (*) Request pages be read from cache [mandatory]:
+
+	int (*read_or_alloc_pages)(struct fscache_object *object,
+				   struct address_space *mapping,
+				   struct list_head *pages,
+				   unsigned *nr_pages,
+				   fscache_rw_complete_t end_io_func,
+				   void *end_io_data,
+				   gfp_t gfp)
+
+     This is like the previous operation, except it will be handed a list of
+     pages instead of one page.  Any pages on which a read operation is started
+     must be added to the page cache for the specified mapping and also to the
+     LRU.  Such pages must also be removed from the pages list and nr_pages
+     decremented per page.
+
+     If there was an error such as -ENOMEM, then that should be returned; else
+     if one or more pages couldn't be read or allocated, then -ENOBUFS should
+     be returned; else if one or more pages couldn't be read, then -ENODATA
+     should be returned.  If all the pages are dispatched then 0 should be
+     returned.
+
+ (*) Request page be allocated in the cache [mandatory]:
+
+	int (*allocate_page)(struct fscache_object *object,
+			     struct page *page,
+			     gfp_t gfp)
+
+     This is like read_or_alloc_page(), except that it shouldn't read from the
+     cache, even if there's data there that could be retrieved.  It should,
+     however, set up any internal metadata required such that write_page() can
+     write to the cache.
+
+     If there's no backing block available, then -ENOBUFS should be returned
+     (or -ENOMEM or -EIO if there were other problems).  If a block is
+     successfully allocated, then the netfs page should be marked and 0
+     returned.
+
+ (*) Request page be written to cache [mandatory]:
+
+	int (*write_page)(struct fscache_object *object,
+			  struct page *page,
+			  fscache_rw_complete_t end_io_func,
+			  void *end_io_data,
+			  gfp_t gfp)
+
+     This is called to write from a page on which there was a previously
+     successful read_or_alloc_page() call.  FS-Cache filters out pages that
+     don't have mappings.
+
+     If there's no backing block available, then -ENOBUFS should be returned
+     (or -ENOMEM or -EIO if there were other problems).
+
+     If the write operation could be queued, then 0 should be returned.  When
+     the write completes, end_io_func() should be called with the following
+     arguments:
+
+	(*end_io_func)(object->cookie->netfs_data,
+		       page,
+		       end_io_data,
+		       error);
+
+ (*) Discard retained per-page metadata [mandatory]:
+
+	void (*uncache_pages)(struct fscache_object *object,
+			      struct pagevec *pagevec)
+
+     This is called when one or more netfs pages are being evicted from the
+     pagecache.  The cache backend should tear down any internal representation
+     or tracking it maintains.
diff --git a/Documentation/filesystems/caching/cachefiles.txt b/Documentation/filesystems/caching/cachefiles.txt
new file mode 100644
index 0000000..37b6385
--- /dev/null
+++ b/Documentation/filesystems/caching/cachefiles.txt
@@ -0,0 +1,281 @@
+	       ===============================================
+	       CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM
+	       ===============================================
+
+Contents:
+
+ (*) Overview.
+
+ (*) Requirements.
+
+ (*) Configuration.
+
+ (*) Starting the cache.
+
+ (*) Things to avoid.
+
+
+========
+OVERVIEW
+========
+
+CacheFiles is a caching backend that's meant to use as a cache a directory on
+an already mounted filesystem of a local type (such as Ext3).
+
+CacheFiles uses a userspace daemon to do some of the cache management - such as
+reaping stale nodes and culling.  This is called cachefilesd and lives in
+/sbin.
+
+The filesystem and data integrity of the cache are only as good as those of the
+filesystem providing the backing services.  Note that CacheFiles does not
+attempt to journal anything since the journalling interfaces of the various
+filesystems are very specific in nature.
+
+CacheFiles creates a proc-file - "/proc/fs/cachefiles" - that is used for
+communication with the daemon.  Only one thing may have this open at once, and
+whilst it is open, a cache is at least partially in existence.  The daemon
+opens this and sends commands down it to control the cache.
+
+CacheFiles is currently limited to a single cache.
+
+CacheFiles attempts to maintain at least a certain percentage of free space on
+the filesystem, shrinking the cache by culling the objects it contains to make
+space if necessary - see the "Cache Culling" section.  This means it can be
+placed on the same medium as a live set of data, and will expand to make use of
+spare space and automatically contract when the set of data requires more
+space.
+
+
+============
+REQUIREMENTS
+============
+
+The use of CacheFiles and its daemon requires the following features to be
+available in the system and in the cache filesystem:
+
+	- dnotify.
+
+	- extended attributes (xattrs).
+
+	- openat() and friends.
+
+	- bmap() support on files in the filesystem (FIBMAP ioctl).
+
+	- The use of bmap() to detect a partial page at the end of the file.
+
+It is strongly recommended that the "dir_index" option is enabled on Ext3
+filesystems being used as a cache.
+
+
+=============
+CONFIGURATION
+=============
+
+The cache is configured by a script in /etc/cachefilesd.conf.  These commands
+set up cache ready for use.  The following script commands are available:
+
+ (*) brun <N>%
+ (*) bcull <N>%
+ (*) bstop <N>%
+
+	Configure the culling limits.  Optional.  See the section on culling
+	The defaults are 7%, 5% and 1% respectively.
+
+ (*) dir <path>
+
+	Specify the directory containing the root of the cache.  Mandatory.
+
+ (*) tag <name>
+
+	Specify a tag to FS-Cache to use in distinguishing multiple caches.
+	Optional.  The default is "CacheFiles".
+
+ (*) debug <mask>
+
+	Specify a numeric bitmask to control debugging in the kernel module.
+	Optional.  The default is zero (all off).  The following values can be
+	OR'd into the mask to collect various information:
+
+		1	Turn on trace of function entry (_enter() macros)
+		2	Turn on trace of function exit (_leave() macros)
+		4	Turn on trace of internal debug points (_debug())
+
+	This mask can also be set through /proc/sys/fs/cachefiles/debug.
+
+
+==================
+STARTING THE CACHE
+==================
+
+The cache is started by running the daemon.  The daemon opens the cache proc
+file, configures the cache and tells it to begin caching.  At that point the
+cache binds to fscache and the cache becomes live.
+
+The daemon is run as follows:
+
+	/sbin/cachefilesd [-d]* [-s] [-n] [-f <configfile>]
+
+The flags are:
+
+ (*) -d
+
+	Increase the debugging level.  This can be specified multiple times and
+	is cumulative with itself.
+
+ (*) -s
+
+	Send messages to stderr instead of syslog.
+
+ (*) -n
+
+	Don't daemonise and go into background.
+
+ (*) -f <configfile>
+
+	Use an alternative configuration file rather than the default one.
+
+
+===============
+THINGS TO AVOID
+===============
+
+Do not mount other things within the cache as this will cause problems.  The
+kernel module contains its own very cut-down path walking facility that ignores
+mountpoints, but the daemon can't avoid them.
+
+Do not create, rename or unlink files and directories in the cache whilst the
+cache is active, as this may cause the state to become uncertain.
+
+Renaming files in the cache might make objects appear to be other objects (the
+filename is part of the lookup key).
+
+Do not change or remove the extended attributes attached to cache files by the
+cache as this will cause the cache state management to get confused.
+
+Do not create files or directories in the cache, lest the cache get confused or
+serve incorrect data.
+
+Do not chmod files in the cache.  The module creates things with minimal
+permissions to prevent random users being able to access them directly.
+
+
+=============
+CACHE CULLING
+=============
+
+The cache may need culling occasionally to make space.  This involves
+discarding objects from the cache that have been used less recently than
+anything else.  Culling is based on the access time of data objects.  Empty
+directories are culled if not in use.
+
+Cache culling is done on the basis of the percentage of blocks available in the
+underlying filesystem.  There are three "limits":
+
+ (*) brun
+
+     If the amount of available space in the cache rises above this limit, then
+     culling is turned off.
+
+ (*) bcull
+
+     If the amount of available space in the cache falls below this limit, then
+     culling is started.
+
+ (*) bstop
+
+     If the amount of available space in the cache falls below this limit, then
+     no further allocation of disk space is permitted until culling has raised
+     the amount above this limit again.
+
+These must be configured thusly:
+
+	0 <= bstop < bcull < brun < 100
+
+Note that these are percentages of available space, and do _not_ appear as 100
+minus the percentage displayed by the "df" program.
+
+The userspace daemon scans the cache to build up a table of cullable objects.
+These are then culled in least recently used order.  A new scan of the cache is
+started as soon as space is made in the table.  Objects will be skipped if
+their atimes have changed or if the kernel module says it is still using them.
+
+
+===============
+CACHE STRUCTURE
+===============
+
+The CacheFiles module will create two directories in the directory it was
+given:
+
+ (*) cache/
+
+ (*) graveyard/
+
+The active cache objects all reside in the first directory.  The CacheFiles
+kernel module moves any retired or culled objects that it can't simply unlink
+to the graveyard from which the daemon will actually delete them.
+
+The daemon uses dnotify to monitor the graveyard directory, and will delete
+anything that appears therein.
+
+
+The module represents index objects as directories with the filename "I..." or
+"J...".  Note that the "cache/" directory is itself a special index.
+
+Data objects are represented as files if they have no children, or directories
+if they do.  Their filenames all begin "D..." or "E...".  If represented as a
+directory, data objects will have a file in the directory called "data" that
+actually holds the data.
+
+Special objects are similar to data objects, except their filenames begin
+"S..." or "T...".
+
+
+If an object has children, then it will be represented as a directory.
+Immediately in the representative directory are a collection of directories
+named for hash values of the child object keys with an '@' prepended.  Into
+this directory, if possible, will be placed the representations of the child
+objects:
+
+	INDEX     INDEX      INDEX                             DATA FILES
+	========= ========== ================================= ================
+	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400
+	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry
+	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry
+	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry
+
+
+If the key is so long that it exceeds NAME_MAX with the decorations added on to
+it, then it will be cut into pieces, the first few of which will be used to
+make a nest of directories, and the last one of which will be the objects
+inside the last directory.  The names of the intermediate directories will have
+'+' prepended:
+
+	J1223/@23/+xy...z/+kl...m/Epqr
+
+
+Note that keys are raw data, and not only may they exceed NAME_MAX in size,
+they may also contain things like '/' and NUL characters, and so they may not
+be suitable for turning directly into a filename.
+
+To handle this, CacheFiles will use a suitably printable filename directly and
+"base-64" encode ones that aren't directly suitable.  The two versions of
+object filenames indicate the encoding:
+
+	OBJECT TYPE	PRINTABLE	ENCODED
+	===============	===============	===============
+	Index		"I..."		"J..."
+	Data		"D..."		"E..."
+	Special		"S..."		"T..."
+
+Intermediate directories are always "@" or "+" as appropriate.
+
+
+Each object in the cache has an extended attribute label that holds the object
+type ID (required to distinguish special objects) and the auxiliary data from
+the netfs.  The latter is used to detect stale objects in the cache and update
+or retire them.
+
+
+Note that CacheFiles will erase from the cache any file it doesn't recognise or
+any file of an incorrect type (such as a FIFO file or a device file).
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
new file mode 100644
index 0000000..82c3168
--- /dev/null
+++ b/Documentation/filesystems/caching/fscache.txt
@@ -0,0 +1,151 @@
+			  ==========================
+			  General Filesystem Caching
+			  ==========================
+
+========
+OVERVIEW
+========
+
+This facility is a general purpose cache for network filesystems, though it
+could be used for caching other things such as ISO9660 filesystems too.
+
+FS-Cache mediates between cache backends (such as CacheFS) and network
+filesystems:
+
+	+---------+
+	|         |                        +--------------+
+	|   NFS   |--+                     |              |
+	|         |  |                 +-->|   CacheFS    |
+	+---------+  |   +----------+  |   |  /dev/hda5   |
+	             |   |          |  |   +--------------+
+	+---------+  +-->|          |  |
+	|         |      |          |--+
+	|   AFS   |----->| FS-Cache |
+	|         |      |          |--+
+	+---------+  +-->|          |  |
+	             |   |          |  |   +--------------+
+	+---------+  |   +----------+  |   |              |
+	|         |  |                 +-->|  CacheFiles  |
+	|  ISOFS  |--+                     |  /var/cache  |
+	|         |                        +--------------+
+	+---------+
+
+
+FS-Cache does not follow the idea of completely loading every netfs file
+opened in its entirety into a cache before permitting it to be accessed and
+then serving the pages out of that cache rather than the netfs inode because:
+
+ (1) It must be practical to operate without a cache.
+
+ (2) The size of any accessible file must not be limited to the size of the
+     cache.
+
+ (3) The combined size of all opened files (this includes mapped libraries)
+     must not be limited to the size of the cache.
+
+ (4) The user should not be forced to download an entire file just to do a
+     one-off access of a small portion of it (such as might be done with the
+     "file" program).
+
+It instead serves the cache out in PAGE_SIZE chunks as and when requested by
+the netfs('s) using it.
+
+
+FS-Cache provides the following facilities:
+
+ (1) More than one cache can be used at once.  Caches can be selected
+     explicitly by use of tags.
+
+ (2) Caches can be added / removed at any time.
+
+ (3) The netfs is provided with an interface that allows either party to
+     withdraw caching facilities from a file (required for (2)).
+
+ (4) The interface to the netfs returns as few errors as possible, preferring
+     rather to let the netfs remain oblivious.
+
+ (5) Cookies are used to represent indices, files and other objects to the
+     netfs.  The simplest cookie is just a NULL pointer - indicating nothing
+     cached there.
+
+ (6) The netfs is allowed to propose - dynamically - any index hierarchy it
+     desires, though it must be aware that the index search function is
+     recursive, stack space is limited, and indices can only be children of
+     indices.
+
+ (7) Data I/O is done direct to and from the netfs's pages.  The netfs
+     indicates that page A is at index B of the data-file represented by cookie
+     C, and that it should be read or written.  The cache backend may or may
+     not start I/O on that page, but if it does, a netfs callback will be
+     invoked to indicate completion.  The I/O may be either synchronous or
+     asynchronous.
+
+ (8) Cookies can be "retired" upon release.  At this point FS-Cache will mark
+     them as obsolete and the index hierarchy rooted at that point will get
+     recycled.
+
+ (9) The netfs provides a "match" function for index searches.  In addition to
+     saying whether a match was made or not, this can also specify that an
+     entry should be updated or deleted.
+
+
+FS-Cache maintains a virtual indexing tree in which all indices, files, objects
+and pages are kept.  Bits of this tree may actually reside in one or more
+caches.
+
+                                           FSDEF
+                                             |
+                        +------------------------------------+
+                        |                                    |
+                       NFS                                  AFS
+                        |                                    |
+           +--------------------------+                +-----------+
+           |                          |                |           |
+        homedir                     mirror          afs.org   redhat.com
+           |                          |                            |
+     +------------+           +---------------+              +----------+
+     |            |           |               |              |          |
+   00001        00002       00007           00125        vol00001   vol00002
+     |            |           |               |                         |
+ +---+---+     +-----+      +---+      +------+------+            +-----+----+
+ |   |   |     |     |      |   |      |      |      |            |     |    |
+PG0 PG1 PG2   PG0  XATTR   PG0 PG1   DIRENT DIRENT DIRENT        R/W   R/O  Bak
+                     |                                            |
+                    PG0                                       +-------+
+                                                              |       |
+                                                            00001   00003
+                                                              |
+                                                          +---+---+
+                                                          |   |   |
+                                                         PG0 PG1 PG2
+
+In the example above, you can see two netfs's being backed: NFS and AFS.  These
+have different index hierarchies:
+
+ (*) The NFS primary index contains per-server indices.  Each server index is
+     indexed by NFS file handles to get data file objects.  Each data file
+     objects can have an array of pages, but may also have further child
+     objects, such as extended attributes and directory entries.  Extended
+     attribute objects themselves have page-array contents.
+
+ (*) The AFS primary index contains per-cell indices.  Each cell index contains
+     per-logical-volume indices.  Each of volume index contains up to three
+     indices for the read-write, read-only and backup mirrors of those volumes.
+     Each of these contains vnode data file objects, each of which contains an
+     array of pages.
+
+The very top index is the FS-Cache master index in which individual netfs's
+have entries.
+
+Any index object may reside in more than one cache, provided it only has index
+children.  Any index with non-index object children will be assumed to only
+reside in one cache.
+
+
+The netfs API to FS-Cache can be found in:
+
+	Documentation/filesystems/caching/netfs-api.txt
+
+The cache backend API to FS-Cache can be found in:
+
+	Documentation/filesystems/caching/backend-api.txt
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
new file mode 100644
index 0000000..a1a182b
--- /dev/null
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -0,0 +1,752 @@
+			===============================
+			FS-CACHE NETWORK FILESYSTEM API
+			===============================
+
+There's an API by which a network filesystem can make use of the FS-Cache
+facilities.  This is based around a number of principles:
+
+ (1) Caches can store a number of different object types.  There are two main
+     object types: indices and files.  The first is a special type used by
+     FS-Cache to make finding objects faster and to make retiring of groups of
+     objects easier.
+
+ (2) Every index, file or other object is represented by a cookie.  This cookie
+     may or may not have anything associated with it, but the netfs doesn't
+     need to care.
+
+ (3) Barring the top-level index (one entry per cached netfs), the index
+     hierarchy for each netfs is structured according the whim of the netfs.
+
+This API is declared in <linux/fscache.h>.
+
+This document contains the following sections:
+
+	 (1) Network filesystem definition
+	 (2) Index definition
+	 (3) Object definition
+	 (4) Network filesystem (un)registration
+	 (5) Cache tag lookup
+	 (6) Index registration
+	 (7) Data file registration
+	 (8) Miscellaneous object registration
+	 (9) Setting the data file size
+	(10) Page alloc/read/write
+	(11) Page uncaching
+	(12) Index and data file update
+	(13) Miscellaneous cookie operations
+	(14) Cookie unregistration
+	(15) Index and data file invalidation
+
+
+=============================
+NETWORK FILESYSTEM DEFINITION
+=============================
+
+FS-Cache needs a description of the network filesystem.  This is specified
+using a record of the following structure:
+
+	struct fscache_netfs {
+		uint32_t			version;
+		const char			*name;
+		struct fscache_netfs_operations	*ops;
+		struct fscache_cookie		*primary_index;
+		...
+	};
+
+This first three fields should be filled in before registration, and the fourth
+will be filled in by the registration function; any other fields should just be
+ignored and are for internal use only.
+
+The fields are:
+
+ (1) The name of the netfs (used as the key in the toplevel index).
+
+ (2) The version of the netfs (if the name matches but the version doesn't, the
+     entire in-cache hierarchy for this netfs will be scrapped and begun
+     afresh).
+
+ (3) The operations table is defined as follows:
+
+	struct fscache_netfs_operations {
+	};
+
+     Currently there aren't any functions here.
+
+ (4) The cookie representing the primary index will be allocated according to
+     another parameter passed into the registration function.
+
+For example, kAFS (linux/fs/afs/) uses the following definitions to describe
+itself:
+
+	static struct fscache_netfs_operations afs_cache_ops = {
+	};
+
+	struct fscache_netfs afs_cache_netfs = {
+		.version	= 0,
+		.name		= "afs",
+		.ops		= &afs_cache_ops,
+	};
+
+
+================
+INDEX DEFINITION
+================
+
+Indices are used for two purposes:
+
+ (1) To aid the finding of a file based on a series of keys (such as AFS's
+     "cell", "volume ID", "vnode ID").
+
+ (2) To make it easier to discard a subset of all the files cached based around
+     a particular key - for instance to mirror the removal of an AFS volume.
+
+However, since it's unlikely that any two netfs's are going to want to define
+their index hierarchies in quite the same way, FS-Cache tries to impose as few
+restraints as possible on how an index is structured and where it is placed in
+the tree.  The netfs can even mix indices and data files at the same level, but
+it's not recommended.
+
+Each index entry consists of a key of indeterminate length plus some auxilliary
+data, also of indeterminate length.
+
+There are some limits on indices:
+
+ (1) Any index containing non-index objects should be restricted to a single
+     cache.  Any such objects created within an index will be created in the
+     first cache only.  The cache in which an index is created can be
+     controlled by cache tags (see below).
+
+ (2) The entry data must be atomically journallable, so it is limited to about
+     400 bytes at present.  At least 400 bytes will be available.
+
+ (3) The depth of the index tree should be judged with care as the search
+     function is recursive.  Too many layers will run the kernel out of stack.
+
+
+=================
+OBJECT DEFINITION
+=================
+
+To define an object, a structure of the following type should be filled out:
+
+	struct fscache_object_def
+	{
+		uint8_t name[16];
+		uint8_t type;
+
+		struct fscache_cache_tag *(*select_cache)(
+			const void *parent_netfs_data,
+			const void *cookie_netfs_data);
+
+		uint16_t (*get_key)(const void *cookie_netfs_data,
+				    void *buffer,
+				    uint16_t bufmax);
+
+		void (*get_attr)(const void *cookie_netfs_data,
+				 uint64_t *size);
+
+		uint16_t (*get_aux)(const void *cookie_netfs_data,
+				    void *buffer,
+				    uint16_t bufmax);
+
+		fscache_checkaux_t (*check_aux)(void *cookie_netfs_data,
+						const void *data,
+						uint16_t datalen);
+
+		void (*get_context)(void *cookie_netfs_data, void *context);
+
+		void (*put_context)(void *cookie_netfs_data, void *context);
+
+		void (*mark_pages_cached)(void *cookie_netfs_data,
+					  struct address_space *mapping,
+					  struct pagevec *cached_pvec);
+
+		void (*now_uncached)(void *cookie_netfs_data);
+	};
+
+This has the following fields:
+
+ (1) The type of the object [mandatory].
+
+     This is one of the following values:
+
+	(*) FSCACHE_COOKIE_TYPE_INDEX
+
+	    This defines an index, which is a special FS-Cache type.
+
+	(*) FSCACHE_COOKIE_TYPE_DATAFILE
+
+	    This defines an ordinary data file.
+
+	(*) Any other value between 2 and 255
+
+	    This defines an extraordinary object such as an XATTR.
+
+ (2) The name of the object type (NUL terminated unless all 16 chars are used)
+     [optional].
+
+ (3) A function to select the cache in which to store an index [optional].
+
+     This function is invoked when an index needs to be instantiated in a cache
+     during the instantiation of a non-index object.  Only the immediate index
+     parent for the non-index object will be queried.  Any indices above that
+     in the hierarchy may be stored in multiple caches.  This function does not
+     need to be supplied for any non-index object or any index that will only
+     have index children.
+
+     If this function is not supplied or if it returns NULL then the first
+     cache in the parent's list will be chosed, or failing that, the first
+     cache in the master list.
+
+ (4) A function to retrieve an object's key from the netfs [mandatory].
+
+     This function will be called with the netfs data that was passed to the
+     cookie acquisition function and the maximum length of key data that it may
+     provide.  It should write the required key data into the given buffer and
+     return the quantity it wrote.
+
+ (5) A function to retrieve attribute data from the netfs [optional].
+
+     This function will be called with the netfs data that was passed to the
+     cookie acquisition function.  It should return the size of the file if
+     this is a data file.  The size may be used to govern how much cache must
+     be reserved for this file in the cache.
+
+     If the function is absent, a file size of 0 is assumed.
+
+ (6) A function to retrieve auxilliary data from the netfs [optional].
+
+     This function will be called with the netfs data that was passed to the
+     cookie acquisition function and the maximum length of auxilliary data that
+     it may provide.  It should write the auxilliary data into the given buffer
+     and return the quantity it wrote.
+
+     If this function is absent, the auxilliary data length will be set to 0.
+
+     The length of the auxilliary data buffer may be dependent on the key
+     length.  A netfs mustn't rely on being able to provide more than 400 bytes
+     for both.
+
+ (7) A function to check the auxilliary data [optional].
+
+     This function will be called to check that a match found in the cache for
+     this object is valid.  For instance with AFS it could check the auxilliary
+     data against the data version number returned by the server to determine
+     whether the index entry in a cache is still valid.
+
+     If this function is absent, it will be assumed that matching objects in a
+     cache are always valid.
+
+     If present, the function should return one of the following values:
+
+	(*) FSCACHE_CHECKAUX_OKAY		- the entry is okay as is
+	(*) FSCACHE_CHECKAUX_NEEDS_UPDATE	- the entry requires update
+	(*) FSCACHE_CHECKAUX_OBSOLETE		- the entry should be deleted
+
+     This function can also be used to extract data from the auxilliary data in
+     the cache and copy it into the netfs's structures.
+
+ (8) A pair of functions to manage contexts for the completion callback
+     [optional].
+
+     The cache read/write functions are passed a context which is then passed
+     to the I/O completion callback function.  To ensure this context remains
+     valid until after the I/O completion is called, two functions may be
+     provided: one to get an extra reference on the context, and one to drop a
+     reference to it.
+
+     If the context is not used or is a type of object that won't go out of
+     scope, then these functions are not required.  These functions are not
+     required for indices as indices may not contain data.  These functions may
+     be called in interrupt context and so may not sleep.
+
+ (9) A function to mark a page as retaining cache metadata [mandatory].
+
+     This is called by the cache to indicate that it is retaining in-memory
+     information for this page and that the netfs should uncache the page when
+     it has finished.  This does not indicate whether there's data on the disk
+     or not.  Note that several pages at once may be presented for marking.
+
+     kAFS and NFS use the PG_private bit on the page structure for this, but
+     that may not be appropriate in all cases.
+
+     This function is not required for indices as they're not permitted data.
+
+(10) A function to unmark all the pages retaining cache metadata [mandatory].
+
+     This is called by FS-Cache to indicate that a backing store is being
+     unbound from a cookie and that all the marks on the pages should be
+     cleared to prevent confusion.  Note that the cache will have torn down all
+     its tracking information so that the pages don't need to be explicitly
+     uncached.
+
+     This function is not required for indices as they're not permitted data.
+
+
+===================================
+NETWORK FILESYSTEM (UN)REGISTRATION
+===================================
+
+The first step is to declare the network filesystem to the cache.  This also
+involves specifying the layout of the primary index (for AFS, this would be the
+"cell" level).
+
+The registration function is:
+
+	int fscache_register_netfs(struct fscache_netfs *netfs);
+
+It just takes a pointer to the netfs definition.  It returns 0 or an error as
+appropriate.
+
+For kAFS, registration is done as follows:
+
+	ret = fscache_register_netfs(&afs_cache_netfs);
+
+The last step is, of course, unregistration:
+
+	void fscache_unregister_netfs(struct fscache_netfs *netfs);
+
+
+================
+CACHE TAG LOOKUP
+================
+
+FS-Cache permits the use of more than one cache.  To permit particular index
+subtrees to be bound to particular caches, the second step is to look up cache
+representation tags.  This step is optional; it can be left entirely up to
+FS-Cache as to which cache should be used.  The problem with doing that is that
+FS-Cache will always pick the first cache that was registered.
+
+To get the representation for a named tag:
+
+	struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name);
+
+This takes a text string as the name and returns a representation of a tag.  It
+will never return an error.  It may return a dummy tag, however, if it runs out
+of memory; this will inhibit caching with this tag.
+
+Any representation so obtained must be released by passing it to this function:
+
+	void fscache_release_cache_tag(struct fscache_cache_tag *tag);
+
+The tag will be retrieved by FS-Cache when it calls the object definition
+operation select_cache().
+
+
+==================
+INDEX REGISTRATION
+==================
+
+The third step is to inform FS-Cache about part of an index hierarchy that can
+be used to locate files.  This is done by requesting a cookie for each index in
+the path to the file:
+
+	struct fscache_cookie *
+	fscache_acquire_cookie(struct fscache_cookie *parent,
+			       struct fscache_object_def *def,
+			       void *netfs_data);
+
+This function creates an index entry in the index represented by parent,
+filling in the index entry by calling the operations pointed to by def.
+
+Note that this function never returns an error - all errors are handled
+internally.  It may also return NULL to indicate no cookie.  It is quite
+acceptable to pass this token back to this function as the parent to another
+acquisition (or even to the relinquish cookie, read page and write page
+functions - see below).
+
+Note also that no indices are actually created in a cache until a non-index
+object needs to be created somewhere down the hierarchy.  Furthermore, an index
+may be created in several different caches independently at different times.
+This is all handled transparently, and the netfs doesn't see any of it.
+
+For example, with AFS, a cell would be added to the primary index.  This index
+entry would have a dependent inode containing a volume location index for the
+volume mappings within this cell:
+
+	cell->cache =
+		fscache_acquire_cookie(afs_cache_netfs.primary_index,
+				       &afs_cell_cache_index_def,
+				       cell);
+
+Then when a volume location was accessed, it would be entered into the cell's
+index and an inode would be allocated that acts as a volume type and hash chain
+combination:
+
+	vlocation->cache =
+		fscache_acquire_cookie(cell->cache,
+				       &afs_vlocation_cache_index_def,
+				       vlocation);
+
+And then a particular flavour of volume (R/O for example) could be added to
+that index, creating another index for vnodes (AFS inode equivalents):
+
+	volume->cache =
+		fscache_acquire_cookie(vlocation->cache,
+				       &afs_volume_cache_index_def,
+				       volume);
+
+
+======================
+DATA FILE REGISTRATION
+======================
+
+The fourth step is to request a data file be created in the cache.  This is
+identical to index cookie acquisition.  The only difference is that the type in
+the object definition should be something other than index type.
+
+	vnode->cache =
+		fscache_acquire_cookie(volume->cache,
+				       &afs_vnode_cache_object_def,
+				       vnode);
+
+
+=================================
+MISCELLANEOUS OBJECT REGISTRATION
+=================================
+
+An optional step is to request an object of miscellaneous type be created in
+the cache.  This is almost identical to index cookie acquisition.  The only
+difference is that the type in the object definition should be something other
+than index type.  Whilst the parent object could be an index, it's more likely
+it would be some other type of object such as a data file.
+
+	xattr->cache =
+		fscache_acquire_cookie(vnode->cache,
+				       &afs_xattr_cache_object_def,
+				       xattr);
+
+Miscellaneous objects might be used to store extended attributes or directory
+entries for example.
+
+
+==========================
+SETTING THE DATA FILE SIZE
+==========================
+
+The fifth step is to set the size of the file.  This doesn't automatically
+reserve any space in the cache, but permits the cache to adjust its metadata
+for data tracking appropriately:
+
+	int fscache_set_i_size(struct fscache_cookie *cookie, loff_t i_size);
+
+The cache will return -ENOBUFS if there is no backing cache or if there is no
+space to allocate any extra metadata required in the cache.
+
+Note that attempts to read or write data pages in the cache over this size may
+be rebuffed with -ENOBUFS.
+
+
+=====================
+PAGE READ/ALLOC/WRITE
+=====================
+
+And the sixth step is to store and retrieve pages in the cache.  There are
+three functions that are used to do this.
+
+Note:
+
+ (1) A page should not be re-read or re-allocated without uncaching it first.
+
+ (2) A read or allocated page must be uncached when the netfs page is released
+     from the pagecache.
+
+ (3) A page should only be written to the cache if previous read or allocated.
+
+This permits the cache to maintain its page tracking in proper order.
+
+
+PAGE READ
+---------
+
+Firstly, the netfs should ask FS-Cache to examine the caches and read the
+contents cached for a particular page of a particular file if present, or else
+allocate space to store the contents if not:
+
+	typedef
+	void (*fscache_rw_complete_t)(struct page *page,
+				      void *context,
+				      int error);
+
+	int fscache_read_or_alloc_page(struct fscache_cookie *cookie,
+				       struct page *page,
+				       fscache_rw_complete_t end_io_func,
+				       void *end_io_data,
+				       gfp_t gfp);
+
+The cookie argument must specify a cookie for an object that isn't an index,
+the page specified will have the data loaded into it (and is also used to
+specify the page number), and the gfp argument is used to control how any
+memory allocations made are satisfied.
+
+If the cookie indicates the inode is not cached:
+
+ (1) The function will return -ENOBUFS.
+
+Else if there's a copy of the page resident in the cache:
+
+ (1) The mark_pages_cached() cookie operation will be called on that page.
+
+ (2) The function will submit a request to read the data from the cache's
+     backing device directly into the page specified.
+
+ (3) The function will return 0.
+
+ (4) When the read is complete, end_io_func() will be invoked with:
+
+     (*) The netfs data supplied when the cookie was created.
+
+     (*) The page descriptor.
+
+     (*) The context argument passed to the above function.  This will be
+     	 maintained with the get_context/put_context functions mentioned above.
+
+     (*) An argument that's 0 on success or negative for an error code.
+
+     If an error occurs, it should be assumed that the page contains no usable
+     data.
+
+     end_io_func() will be called in process context if the read is results in
+     an error, but it might be called in interrupt context if the read is
+     successful.
+
+Otherwise, if there's not a copy available in cache, but the cache may be able
+to store the page:
+
+ (1) The mark_pages_cached() cookie operation will be called on that page.
+
+ (2) A block may be reserved in the cache and attached to the object at the
+     appropriate place.
+
+ (3) The function will return -ENODATA.
+
+This function may also return -ENOMEM or -EINTR, in which case it won't have
+read any data from the cache.
+
+
+PAGE ALLOCATE
+-------------
+
+Alternatively, if there's not expected to be any data in the cache for a page
+because the file has been extended, a block can simply be allocated instead:
+
+	int fscache_alloc_page(struct fscache_cookie *cookie,
+			       struct page *page,
+			       gfp_t gfp);
+
+This is similar to the fscache_read_or_alloc_page() function, except that it
+never reads from the cache.  It will return 0 if a block has been allocated,
+rather than -ENODATA as the other would.  One or the other must be performed
+before writing to the cache.
+
+The mark_pages_cached() cookie operation will be called on the page if
+successful.
+
+
+PAGE WRITE
+----------
+
+Secondly, if the netfs changes the contents of the page (either due to an
+initial download or if a user performs a write), then the page should be
+written back to the cache:
+
+	int fscache_write_page(struct fscache_cookie *cookie,
+			       struct page *page,
+			       fscache_rw_complete_t end_io_func,
+			       void *context,
+			       gfp_t gfp);
+
+The cookie argument must specify a data file cookie, the page specified should
+contain the data to be written (and is also used to specify the page number),
+and the gfp argument is used to control how any memory allocations made are
+satisfied.
+
+The page must have first been read or allocated successfully and must not have
+been uncached before writing is performed.
+
+If the cookie indicates the inode is not cached then:
+
+ (1) The function will return -ENOBUFS.
+
+Else if space can be allocated in the cache to hold this page:
+
+ (1) The function will submit a request to write the data to cache's backing
+     device directly from the page specified.
+
+ (2) The function will return 0.
+
+ (3) When the write is complete the end_io_func() will be invoked with:
+
+     (*) The netfs data supplied when the cookie was created.
+
+     (*) The page descriptor.
+
+     (*) The context argument passed to the function.  This will be maintained
+     	 with the get_context/put_context functions mentioned above.
+
+     (*) An argument that's 0 on success or negative for an error.
+
+     If an error occurs, it can be assumed that the page has not been written
+     to the cache, and that either there's a block containing the old data or
+     no block at all in the cache.
+
+     end_io_func() might be called in interrupt context.
+
+Else if there's no space available in the cache, -ENOBUFS will be returned.
+
+
+MULTIPLE PAGE READ
+------------------
+
+A facility is provided to read several pages at once, as requested by the
+readpages() address space operation:
+
+	int fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
+					struct address_space *mapping,
+					struct list_head *pages,
+					int *nr_pages,
+					fscache_rw_complete_t end_io_func,
+					void *context,
+					gfp_t gfp);
+
+This works in a similar way to fscache_read_or_alloc_page(), except:
+
+ (1) Any page it can retrieve data for is removed from pages and nr_pages and
+     dispatched for reading to the disk.  Reads of adjacent pages on disk may
+     be merged for greater efficiency.
+
+ (2) The mark_pages_cached() cookie operation will be called on several pages
+     at once if they're being read or allocated.
+
+ (3) If there was an general error, then that error will be returned.
+
+     Else if some pages couldn't be allocated or read, then -ENOBUFS will be
+     returned.
+
+     Else if some pages couldn't be read but were allocated, then -ENODATA will
+     be returned.
+
+     Otherwise, if all pages had reads dispatched, then 0 will be returned, the
+     list will be empty and *nr_pages will be 0.
+
+ (4) end_io_func will be called once for each page being read as the reads
+     complete.  It will be called in process context if error != 0, but it may
+     be called in interrupt context if there is no error.
+
+Note that a return of -ENODATA, -ENOBUFS or any other error does not preclude
+some of the pages being read and some being allocated.  Those pages will have
+been marked appropriately and will need uncaching.
+
+
+==============
+PAGE UNCACHING
+==============
+
+To uncache a page, this function should be called:
+
+	void fscache_uncache_page(struct fscache_cookie *cookie,
+				  struct page *page);
+
+This function permits the cache to release any in-memory representation it
+might be holding for this netfs page.  This function must be called once for
+each page on which the read or write page functions above have been called to
+make sure the cache's in-memory tracking information gets torn down.
+
+Note that pages can't be explicitly deleted from the a data file.  The whole
+data file must be retired (see the relinquish cookie function below).
+
+Furthermore, note that this does not cancel the asynchronous read or write
+operation started by the read/alloc and write functions.
+
+There is another unbinding operation similar to the above that takes a set of
+pages to unbind in one go:
+
+	void fscache_uncache_pagevec(struct fscache_cookie *cookie,
+				     struct pagevec *pagevec);
+
+
+==========================
+INDEX AND DATA FILE UPDATE
+==========================
+
+To request an update of the index data for an index or other object, the
+following function should be called:
+
+	void fscache_update_cookie(struct fscache_cookie *cookie);
+
+This function will refer back to the netfs_data pointer stored in the cookie by
+the acquisition function to obtain the data to write into each revised index
+entry.  The update method in the parent index definition will be called to
+transfer the data.
+
+Note that partial updates may happen automatically at other times, such as when
+data blocks are added to a data file object.
+
+
+===============================
+MISCELLANEOUS COOKIE OPERATIONS
+===============================
+
+There are a number of operations that can be used to control cookies:
+
+ (*) Cookie pinning:
+
+	int fscache_pin_cookie(struct fscache_cookie *cookie);
+	void fscache_unpin_cookie(struct fscache_cookie *cookie);
+
+     These operations permit data cookies to be pinned into the cache and to
+     have the pinning removed.  They are not permitted on index cookies.
+
+     The pinning function will return 0 if successful, -ENOBUFS in the cookie
+     isn't backed by a cache, -EOPNOTSUPP if the cache doesn't support pinning,
+     -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
+     -EIO if there's any other problem.
+
+ (*) Data space reservation:
+
+	int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size);
+
+     This permits a netfs to request cache space be reserved to store up to the
+     given amount of a file.  It is permitted to ask for more than the current
+     size of the file to allow for future file expansion.
+
+     If size is given as zero then the reservation will be cancelled.
+
+     The function will return 0 if successful, -ENOBUFS in the cookie isn't
+     backed by a cache, -EOPNOTSUPP if the cache doesn't support reservations,
+     -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
+     -EIO if there's any other problem.
+
+     Note that this doesn't pin an object in a cache; it can still be culled to
+     make space if it's not in use.
+
+
+=====================
+COOKIE UNREGISTRATION
+=====================
+
+To get rid of a cookie, this function should be called.
+
+	void fscache_relinquish_cookie(struct fscache_cookie *cookie,
+				       int retire);
+
+If retire is non-zero, then the object will be marked for recycling, and all
+copies of it will be removed from all active caches in which it is present.
+Not only that but all child objects will also be retired.
+
+If retire is zero, then the object may be available again when next the
+acquisition function is called.  Retirement here will overrule the pinning on a
+cookie.
+
+One very important note - relinquish must NOT be called for a cookie unless all
+the cookies for "child" indices, objects and pages have been relinquished
+first.
+
+
+================================
+INDEX AND DATA FILE INVALIDATION
+================================
+
+There is no direct way to invalidate an index subtree or a data file.  To do
+this, the caller should relinquish and retire the cookie they have, and then
+acquire a new one.
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 0b62c62..ead15f0 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -71,7 +71,7 @@ you might want to raise the limit.
 
 ==============================================================
 
-file-max & file-nr:
+file-max, file-nr & file-kernel:
 
 The kernel allocates file handles dynamically, but as yet it
 doesn't free them again.
@@ -88,6 +88,10 @@ close to the maximum, but the number of 
 significantly greater than 0, you've encountered a peak in your 
 usage of file handles and you don't need to increase the maximum.
 
+The value in file-kernel denotes the number of internal file handles
+that the kernel has open.  These do not contribute to ENFILE
+accounting.
+
 ==============================================================
 
 inode-max, inode-nr & inode-state:
diff --git a/fs/Kconfig b/fs/Kconfig
index 53f5c6d..12e77a1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -530,6 +530,41 @@ config FUSE_FS
 	  If you want to develop a userspace FS, or if you want to use
 	  a filesystem based on FUSE, answer Y or M.
 
+menu "Caches"
+
+config FSCACHE
+	tristate "General filesystem cache manager"
+	depends on EXPERIMENTAL
+	help
+	  This option enables a generic filesystem caching manager that can be
+	  used by various network and other filesystems to cache data
+	  locally. Different sorts of caches can be plugged in, depending on the
+	  resources available.
+
+	  See Documentation/filesystems/caching/fscache.txt for more information.
+
+config CACHEFILES
+	tristate "Filesystem caching on files"
+	depends on FSCACHE
+	help
+	  This permits use of a mounted filesystem as a cache for other
+	  filesystems - primarily networking filesystems - thus allowing fast
+	  local disk to enhance the speed of slower devices.
+
+	  See Documentation/filesystems/caching/cachefiles.txt for more
+	  information.
+
+config CACHEFILES_DEBUG
+	bool "Debug CacheFiles"
+	depends on CACHEFILES
+	help
+	  This permits debugging to be dynamically enabled in the filesystem
+	  caching on files module.  If this is set, the debugging output may be
+	  enabled by setting bits in /proc/sys/fs/cachefiles/debug or by
+	  including a debugging specifier in /etc/cachefilesd.conf.
+
+endmenu
+
 menu "CD-ROM/DVD Filesystems"
 
 config ISO9660_FS
@@ -1470,6 +1505,13 @@ config NFS_V4
 
 	  If unsure, say N.
 
+config NFS_FSCACHE
+	bool "Provide NFS client caching support (EXPERIMENTAL)"
+	depends on NFS_FS && FSCACHE && EXPERIMENTAL
+	help
+	  Say Y here if you want NFS data to be cached locally on disc through
+	  the general filesystem cache manager
+
 config NFS_DIRECTIO
 	bool "Allow direct I/O on NFS files (EXPERIMENTAL)"
 	depends on NFS_FS && EXPERIMENTAL
@@ -1906,6 +1948,13 @@ # for fs/nls/Config.in
 
 	  If unsure, say N.
 
+config AFS_FSCACHE
+	bool "Provide AFS client caching support"
+	depends on AFS_FS && FSCACHE && EXPERIMENTAL
+	help
+	  Say Y here if you want AFS data to be cached locally on through the
+	  generic filesystem cache manager
+
 config RXRPC
 	tristate
 
diff --git a/fs/Makefile b/fs/Makefile
index 8913542..e5efb4b 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -52,6 +52,7 @@ obj-y				+= devpts/
 obj-$(CONFIG_PROFILING)		+= dcookies.o
  
 # Do not add any filesystems before this line
+obj-$(CONFIG_FSCACHE)		+= fscache/
 obj-$(CONFIG_REISERFS_FS)	+= reiserfs/
 obj-$(CONFIG_EXT3_FS)		+= ext3/ # Before ext2 so root fs can be ext3
 obj-$(CONFIG_JBD)		+= jbd/
@@ -100,5 +101,6 @@ obj-$(CONFIG_AFS_FS)		+= afs/
 obj-$(CONFIG_BEFS_FS)		+= befs/
 obj-$(CONFIG_HOSTFS)		+= hostfs/
 obj-$(CONFIG_HPPFS)		+= hppfs/
+obj-$(CONFIG_CACHEFILES)	+= cachefiles/
 obj-$(CONFIG_DEBUG_FS)		+= debugfs/
 obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
diff --git a/fs/afs/cache.h b/fs/afs/cache.h
deleted file mode 100644
index 9eb7722..0000000
--- a/fs/afs/cache.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* cache.h: AFS local cache management interface
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _LINUX_AFS_CACHE_H
-#define _LINUX_AFS_CACHE_H
-
-#undef AFS_CACHING_SUPPORT
-
-#include <linux/mm.h>
-#ifdef AFS_CACHING_SUPPORT
-#include <linux/cachefs.h>
-#endif
-#include "types.h"
-
-#ifdef __KERNEL__
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_AFS_CACHE_H */
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index bfc1fd2..3aaeada 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -31,17 +31,21 @@ static DEFINE_RWLOCK(afs_cells_lock);
 static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
 static struct afs_cell *afs_cell_root;
 
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_cell_cache_match(void *target,
-						const void *entry);
-static void afs_cell_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_cache_cell_index_def = {
-	.name			= "cell_ix",
-	.data_size		= sizeof(struct afs_cache_cell),
-	.keys[0]		= { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
-	.match			= afs_cell_cache_match,
-	.update			= afs_cell_cache_update,
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
+				       void *buffer, uint16_t buflen);
+static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
+				       void *buffer, uint16_t buflen);
+static fscache_checkaux_t afs_cell_cache_check_aux(void *cookie_netfs_data,
+						   const void *buffer,
+						   uint16_t buflen);
+
+static struct fscache_cookie_def afs_cell_cache_index_def = {
+	.name		= "AFS cell",
+	.type		= FSCACHE_COOKIE_TYPE_INDEX,
+	.get_key	= afs_cell_cache_get_key,
+	.get_aux	= afs_cell_cache_get_aux,
+	.check_aux	= afs_cell_cache_check_aux,
 };
 #endif
 
@@ -115,12 +119,11 @@ int afs_cell_create(const char *name, ch
 	if (ret < 0)
 		goto error;
 
-#ifdef AFS_CACHING_SUPPORT
-	/* put it up for caching */
-	cachefs_acquire_cookie(afs_cache_netfs.primary_index,
-			       &afs_vlocation_cache_index_def,
-			       cell,
-			       &cell->cache);
+#ifdef CONFIG_AFS_FSCACHE
+	/* put it up for caching (this never returns an error) */
+	cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
+					     &afs_cell_cache_index_def,
+					     cell);
 #endif
 
 	/* add to the cell lists */
@@ -345,8 +348,8 @@ static void afs_cell_destroy(struct afs_
 	list_del_init(&cell->proc_link);
 	up_write(&afs_proc_cells_sem);
 
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_relinquish_cookie(cell->cache, 0);
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_relinquish_cookie(cell->cache, 0);
 #endif
 
 	up_write(&afs_cells_sem);
@@ -525,44 +528,62 @@ void afs_cell_purge(void)
 
 /*****************************************************************************/
 /*
- * match a cell record obtained from the cache
+ * set the key for the index entry
  */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_cell_cache_match(void *target,
-						const void *entry)
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
+				       void *buffer, uint16_t bufmax)
 {
-	const struct afs_cache_cell *ccell = entry;
-	struct afs_cell *cell = target;
+	const struct afs_cell *cell = cookie_netfs_data;
+	uint16_t klen;
 
-	_enter("{%s},{%s}", ccell->name, cell->name);
+	_enter("%p,%p,%u", cell, buffer, bufmax);
 
-	if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) {
-		_leave(" = SUCCESS");
-		return CACHEFS_MATCH_SUCCESS;
-	}
+	klen = strlen(cell->name);
+	if (klen > bufmax)
+		return 0;
+
+	memcpy(buffer, cell->name, klen);
+	return klen;
 
-	_leave(" = FAILED");
-	return CACHEFS_MATCH_FAILED;
-} /* end afs_cell_cache_match() */
+} /* end afs_cell_cache_get_key() */
 #endif
 
 /*****************************************************************************/
 /*
- * update a cell record in the cache
+ * provide new auxilliary cache data
  */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_cell_cache_update(void *source, void *entry)
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
+				       void *buffer, uint16_t bufmax)
 {
-	struct afs_cache_cell *ccell = entry;
-	struct afs_cell *cell = source;
+	const struct afs_cell *cell = cookie_netfs_data;
+	uint16_t dlen;
 
-	_enter("%p,%p", source, entry);
+	_enter("%p,%p,%u", cell, buffer, bufmax);
 
-	strncpy(ccell->name, cell->name, sizeof(ccell->name));
+	dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]);
+	dlen = min(dlen, bufmax);
+	dlen &= ~(sizeof(cell->vl_addrs[0]) - 1);
 
-	memcpy(ccell->vl_servers,
-	       cell->vl_addrs,
-	       min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs)));
+	memcpy(buffer, cell->vl_addrs, dlen);
+
+	return dlen;
+
+} /* end afs_cell_cache_get_aux() */
+#endif
+
+/*****************************************************************************/
+/*
+ * check that the auxilliary data indicates that the entry is still valid
+ */
+#ifdef CONFIG_AFS_FSCACHE
+static fscache_checkaux_t afs_cell_cache_check_aux(void *cookie_netfs_data,
+						   const void *buffer,
+						   uint16_t buflen)
+{
+	_leave(" = OKAY");
+	return FSCACHE_CHECKAUX_OKAY;
 
-} /* end afs_cell_cache_update() */
+} /* end afs_cell_cache_check_aux() */
 #endif
diff --git a/fs/afs/cell.h b/fs/afs/cell.h
index 4834910..d670502 100644
--- a/fs/afs/cell.h
+++ b/fs/afs/cell.h
@@ -13,7 +13,7 @@ #ifndef _LINUX_AFS_CELL_H
 #define _LINUX_AFS_CELL_H
 
 #include "types.h"
-#include "cache.h"
+#include <linux/fscache.h>
 
 #define AFS_CELL_MAX_ADDRS 15
 
@@ -21,16 +21,6 @@ extern volatile int afs_cells_being_purg
 
 /*****************************************************************************/
 /*
- * entry in the cached cell catalogue
- */
-struct afs_cache_cell
-{
-	char			name[64];	/* cell name (padded with NULs) */
-	struct in_addr		vl_servers[15];	/* cached cell VL servers */
-};
-
-/*****************************************************************************/
-/*
  * AFS cell record
  */
 struct afs_cell
@@ -39,8 +29,8 @@ struct afs_cell
 	struct list_head	link;		/* main cell list link */
 	struct list_head	proc_link;	/* /proc cell list link */
 	struct proc_dir_entry	*proc_dir;	/* /proc dir for this cell */
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_cookie	*cache;		/* caching cookie */
+#ifdef CONFIG_AFS_FSCACHE
+	struct fscache_cookie	*cache;		/* caching cookie */
 #endif
 
 	/* server record management */
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 3d097fd..f87d5a7 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -24,7 +24,7 @@ #include "cmservice.h"
 #include "internal.h"
 
 static unsigned afscm_usage;		/* AFS cache manager usage count */
-static struct rw_semaphore afscm_sem;	/* AFS cache manager start/stop semaphore */
+static DECLARE_RWSEM(afscm_sem);	/* AFS cache manager start/stop semaphore */
 
 static int afscm_new_call(struct rxrpc_call *call);
 static void afscm_attention(struct rxrpc_call *call);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 2fc9987..9800a07 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -145,7 +145,7 @@ #endif
 	qty /= sizeof(union afs_dir_block);
 
 	/* check them */
-	dbuf = page_address(page);
+	dbuf = kmap_atomic(page, KM_USER0);
 	for (tmp = 0; tmp < qty; tmp++) {
 		if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
 			printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n",
@@ -154,12 +154,12 @@ #endif
 			goto error;
 		}
 	}
+	kunmap_atomic(dbuf, KM_USER0);
 
-	SetPageChecked(page);
 	return;
 
  error:
-	SetPageChecked(page);
+	kunmap_atomic(dbuf, KM_USER0);
 	SetPageError(page);
 
 } /* end afs_dir_check_page() */
@@ -170,7 +170,6 @@ #endif
  */
 static inline void afs_dir_put_page(struct page *page)
 {
-	kunmap(page);
 	page_cache_release(page);
 
 } /* end afs_dir_put_page() */
@@ -188,11 +187,9 @@ static struct page *afs_dir_get_page(str
 	page = read_mapping_page(dir->i_mapping, index, NULL);
 	if (!IS_ERR(page)) {
 		wait_on_page_locked(page);
-		kmap(page);
 		if (!PageUptodate(page))
 			goto fail;
-		if (!PageChecked(page))
-			afs_dir_check_page(dir, page);
+		afs_dir_check_page(dir, page);
 		if (PageError(page))
 			goto fail;
 	}
@@ -357,7 +354,7 @@ static int afs_dir_iterate(struct inode 
 
 		limit = blkoff & ~(PAGE_SIZE - 1);
 
-		dbuf = page_address(page);
+		dbuf = kmap_atomic(page, KM_USER0);
 
 		/* deal with the individual blocks stashed on this page */
 		do {
@@ -366,6 +363,7 @@ static int afs_dir_iterate(struct inode 
 			ret = afs_dir_iterate_block(fpos, dblock, blkoff,
 						    cookie, filldir);
 			if (ret != 1) {
+				kunmap_atomic(dbuf, KM_USER0);
 				afs_dir_put_page(page);
 				goto out;
 			}
@@ -374,6 +372,7 @@ static int afs_dir_iterate(struct inode 
 
 		} while (*fpos < dir->i_size && blkoff < limit);
 
+		kunmap_atomic(dbuf, KM_USER0);
 		afs_dir_put_page(page);
 		ret = 0;
 	}
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 67d6634..e8e3680 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -16,12 +16,15 @@ #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/pagevec.h>
 #include <linux/buffer_head.h>
 #include "volume.h"
 #include "vnode.h"
 #include <rxrpc/call.h>
 #include "internal.h"
 
+#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
+
 #if 0
 static int afs_file_open(struct inode *inode, struct file *file);
 static int afs_file_release(struct inode *inode, struct file *file);
@@ -30,34 +33,74 @@ #endif
 static int afs_file_readpage(struct file *file, struct page *page);
 static void afs_file_invalidatepage(struct page *page, unsigned long offset);
 static int afs_file_releasepage(struct page *page, gfp_t gfp_flags);
+static int afs_file_mmap(struct file * file, struct vm_area_struct * vma);
+
+#ifdef CONFIG_AFS_FSCACHE
+static int afs_file_readpages(struct file *filp, struct address_space *mapping,
+			      struct list_head *pages, unsigned nr_pages);
+static int afs_file_page_mkwrite(struct vm_area_struct *vma, struct page *page);
+#endif
 
 struct inode_operations afs_file_inode_operations = {
 	.getattr	= afs_inode_getattr,
 };
 
+const struct file_operations afs_file_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_file_read,
+	.mmap		= afs_file_mmap,
+	.sendfile	= generic_file_sendfile,
+};
+
 const struct address_space_operations afs_fs_aops = {
 	.readpage	= afs_file_readpage,
+#ifdef CONFIG_AFS_FSCACHE
+	.readpages	= afs_file_readpages,
+#endif
 	.sync_page	= block_sync_page,
 	.set_page_dirty	= __set_page_dirty_nobuffers,
 	.releasepage	= afs_file_releasepage,
 	.invalidatepage	= afs_file_invalidatepage,
 };
 
+static struct vm_operations_struct afs_fs_vm_operations = {
+	.nopage		= filemap_nopage,
+	.populate	= filemap_populate,
+#ifdef CONFIG_AFS_FSCACHE
+	.page_mkwrite	= afs_file_page_mkwrite,
+#endif
+};
+
+/*****************************************************************************/
+/*
+ * set up a memory mapping on an AFS file
+ * - we set our own VMA ops so that we can catch the page becoming writable for
+ *   userspace for shared-writable mmap
+ */
+static int afs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	_enter("");
+
+	file_accessed(file);
+	vma->vm_ops = &afs_fs_vm_operations;
+	return 0;
+
+} /* end afs_file_mmap() */
+
 /*****************************************************************************/
 /*
  * deal with notification that a page was read from the cache
  */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_file_readpage_read_complete(void *cookie_data,
-					    struct page *page,
+#ifdef CONFIG_AFS_FSCACHE
+static void afs_file_readpage_read_complete(struct page *page,
 					    void *data,
 					    int error)
 {
-	_enter("%p,%p,%p,%d", cookie_data, page, data, error);
+	_enter("%p,%p,%d", page, data, error);
 
-	if (error)
-		SetPageError(page);
-	else
+	/* if the read completes with an error, we just unlock the page and let
+	 * the VM reissue the readpage */
+	if (!error)
 		SetPageUptodate(page);
 	unlock_page(page);
 
@@ -68,15 +111,16 @@ #endif
 /*
  * deal with notification that a page was written to the cache
  */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_file_readpage_write_complete(void *cookie_data,
-					     struct page *page,
+#ifdef CONFIG_AFS_FSCACHE
+static void afs_file_readpage_write_complete(struct page *page,
 					     void *data,
 					     int error)
 {
-	_enter("%p,%p,%p,%d", cookie_data, page, data, error);
+	_enter("%p,%p,%d", page, data, error);
 
-	unlock_page(page);
+	/* note that the page has been written to the cache and can now be
+	 * modified */
+	end_page_fs_misc(page);
 
 } /* end afs_file_readpage_write_complete() */
 #endif
@@ -88,16 +132,13 @@ #endif
 static int afs_file_readpage(struct file *file, struct page *page)
 {
 	struct afs_rxfs_fetch_descriptor desc;
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_page *pageio;
-#endif
 	struct afs_vnode *vnode;
 	struct inode *inode;
 	int ret;
 
 	inode = page->mapping->host;
 
-	_enter("{%lu},{%lu}", inode->i_ino, page->index);
+	_enter("{%lu},%p{%lu}", inode->i_ino, page, page->index);
 
 	vnode = AFS_FS_I(inode);
 
@@ -107,13 +148,9 @@ #endif
 	if (vnode->flags & AFS_VNODE_DELETED)
 		goto error;
 
-#ifdef AFS_CACHING_SUPPORT
-	ret = cachefs_page_get_private(page, &pageio, GFP_NOIO);
-	if (ret < 0)
-		goto error;
-
+#ifdef CONFIG_AFS_FSCACHE
 	/* is it cached? */
-	ret = cachefs_read_or_alloc_page(vnode->cache,
+	ret = fscache_read_or_alloc_page(vnode->cache,
 					 page,
 					 afs_file_readpage_read_complete,
 					 NULL,
@@ -123,18 +160,20 @@ #else
 #endif
 
 	switch (ret) {
-		/* read BIO submitted and wb-journal entry found */
-	case 1:
-		BUG(); // TODO - handle wb-journal match
-
 		/* read BIO submitted (page in cache) */
 	case 0:
 		break;
 
-		/* no page available in cache */
-	case -ENOBUFS:
+		/* page not yet cached */
 	case -ENODATA:
+		_debug("cache said ENODATA");
+		goto go_on;
+
+		/* page will not be cached */
+	case -ENOBUFS:
+		_debug("cache said ENOBUFS");
 	default:
+	go_on:
 		desc.fid	= vnode->fid;
 		desc.offset	= page->index << PAGE_CACHE_SHIFT;
 		desc.size	= min((size_t) (inode->i_size - desc.offset),
@@ -148,34 +187,40 @@ #endif
 		ret = afs_vnode_fetch_data(vnode, &desc);
 		kunmap(page);
 		if (ret < 0) {
-			if (ret==-ENOENT) {
-				_debug("got NOENT from server"
+			if (ret == -ENOENT) {
+				kdebug("got NOENT from server"
 				       " - marking file deleted and stale");
 				vnode->flags |= AFS_VNODE_DELETED;
 				ret = -ESTALE;
 			}
 
-#ifdef AFS_CACHING_SUPPORT
-			cachefs_uncache_page(vnode->cache, page);
+#ifdef CONFIG_AFS_FSCACHE
+			fscache_uncache_page(vnode->cache, page);
+			ClearPagePrivate(page);
 #endif
 			goto error;
 		}
 
 		SetPageUptodate(page);
 
-#ifdef AFS_CACHING_SUPPORT
-		if (cachefs_write_page(vnode->cache,
-				       page,
-				       afs_file_readpage_write_complete,
-				       NULL,
-				       GFP_KERNEL) != 0
-		    ) {
-			cachefs_uncache_page(vnode->cache, page);
-			unlock_page(page);
+		/* send the page to the cache */
+#ifdef CONFIG_AFS_FSCACHE
+		if (PagePrivate(page)) {
+			if (TestSetPageFsMisc(page))
+				BUG();
+			if (fscache_write_page(vnode->cache,
+					       page,
+					       afs_file_readpage_write_complete,
+					       NULL,
+					       GFP_KERNEL) != 0
+			    ) {
+				fscache_uncache_page(vnode->cache, page);
+				ClearPagePrivate(page);
+				end_page_fs_misc(page);
+			}
 		}
-#else
-		unlock_page(page);
 #endif
+		unlock_page(page);
 	}
 
 	_leave(" = 0");
@@ -192,20 +237,63 @@ #endif
 
 /*****************************************************************************/
 /*
- * get a page cookie for the specified page
+ * read a set of pages
  */
-#ifdef AFS_CACHING_SUPPORT
-int afs_cache_get_page_cookie(struct page *page,
-			      struct cachefs_page **_page_cookie)
+#ifdef CONFIG_AFS_FSCACHE
+static int afs_file_readpages(struct file *filp, struct address_space *mapping,
+			      struct list_head *pages, unsigned nr_pages)
 {
-	int ret;
+	struct afs_vnode *vnode;
+#if 0
+	struct pagevec lru_pvec;
+	unsigned page_idx;
+#endif
+	int ret = 0;
 
-	_enter("");
-	ret = cachefs_page_get_private(page,_page_cookie, GFP_NOIO);
+	_enter(",{%lu},,%d", mapping->host->i_ino, nr_pages);
 
-	_leave(" = %d", ret);
+	vnode = AFS_FS_I(mapping->host);
+	if (vnode->flags & AFS_VNODE_DELETED) {
+		_leave(" = -ESTALE");
+		return -ESTALE;
+	}
+
+	/* attempt to read as many of the pages as possible */
+	ret = fscache_read_or_alloc_pages(vnode->cache,
+					  mapping,
+					  pages,
+					  &nr_pages,
+					  afs_file_readpage_read_complete,
+					  NULL,
+					  mapping_gfp_mask(mapping));
+
+	switch (ret) {
+		/* all pages are being read from the cache */
+	case 0:
+		BUG_ON(!list_empty(pages));
+		BUG_ON(nr_pages != 0);
+		_leave(" = 0 [reading all]");
+		return 0;
+
+		/* there were pages that couldn't be read from the cache */
+	case -ENODATA:
+	case -ENOBUFS:
+		break;
+
+		/* other error */
+	default:
+		_leave(" = %d", ret);
+		return ret;
+	}
+
+	/* load the missing pages from the network */
+	ret = read_cache_pages(mapping, pages,
+			       (void *) afs_file_readpage, NULL);
+
+	_leave(" = %d [netting]", ret);
 	return ret;
-} /* end afs_cache_get_page_cookie() */
+
+} /* end afs_file_readpages() */
 #endif
 
 /*****************************************************************************/
@@ -214,35 +302,22 @@ #endif
  */
 static void afs_file_invalidatepage(struct page *page, unsigned long offset)
 {
-	int ret = 1;
-
 	_enter("{%lu},%lu", page->index, offset);
 
 	BUG_ON(!PageLocked(page));
 
 	if (PagePrivate(page)) {
-#ifdef AFS_CACHING_SUPPORT
-		struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
-		cachefs_uncache_page(vnode->cache,page);
-#endif
-
 		/* We release buffers only if the entire page is being
 		 * invalidated.
 		 * The get_block cached value has been unconditionally
 		 * invalidated, so real IO is not possible anymore.
 		 */
-		if (offset == 0) {
-			BUG_ON(!PageLocked(page));
-
-			ret = 0;
-			if (!PageWriteback(page))
-				ret = page->mapping->a_ops->releasepage(page,
-									0);
-			/* possibly should BUG_ON(!ret); - neilb */
-		}
+		if (offset == 0 && !PageWriteback(page))
+			page->mapping->a_ops->releasepage(page, 0);
 	}
 
-	_leave(" = %d", ret);
+	_leave("");
+
 } /* end afs_file_invalidatepage() */
 
 /*****************************************************************************/
@@ -251,23 +326,30 @@ #endif
  */
 static int afs_file_releasepage(struct page *page, gfp_t gfp_flags)
 {
-	struct cachefs_page *pageio;
-
 	_enter("{%lu},%x", page->index, gfp_flags);
 
-	if (PagePrivate(page)) {
-#ifdef AFS_CACHING_SUPPORT
-		struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
-		cachefs_uncache_page(vnode->cache, page);
+#ifdef CONFIG_AFS_FSCACHE
+	wait_on_page_fs_misc(page);
+	fscache_uncache_page(AFS_FS_I(page->mapping->host)->cache, page);
+	ClearPagePrivate(page);
 #endif
 
-		pageio = (struct cachefs_page *) page_private(page);
-		set_page_private(page, 0);
-		ClearPagePrivate(page);
+	/* indicate that the page can be released */
+	_leave(" = 1");
+	return 1;
 
-		kfree(pageio);
-	}
+} /* end afs_file_releasepage() */
 
-	_leave(" = 0");
+/*****************************************************************************/
+/*
+ * wait for the disc cache to finish writing before permitting modification of
+ * our page in the page cache
+ */
+#ifdef CONFIG_AFS_FSCACHE
+static int afs_file_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+	wait_on_page_fs_misc(page);
 	return 0;
-} /* end afs_file_releasepage() */
+
+} /* end afs_file_page_mkwrite() */
+#endif
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 61bc371..c88c41a 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -398,6 +398,8 @@ int afs_rxfs_fetch_file_status(struct af
 		bp++; /* spare6 */
 	}
 
+	_debug("Data Version %llx\n", vnode->status.version);
+
 	/* success */
 	ret = 0;
 
@@ -408,7 +410,7 @@ int afs_rxfs_fetch_file_status(struct af
  out_put_conn:
 	afs_server_release_callslot(server, &callslot);
  out:
-	_leave("");
+	_leave(" = %d", ret);
 	return ret;
 
  abort:
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 4ebb30a..0a59eda 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -49,7 +49,7 @@ static int afs_inode_map_status(struct a
 	case AFS_FTYPE_FILE:
 		inode->i_mode	= S_IFREG | vnode->status.mode;
 		inode->i_op	= &afs_file_inode_operations;
-		inode->i_fop	= &generic_ro_fops;
+		inode->i_fop	= &afs_file_file_operations;
 		break;
 	case AFS_FTYPE_DIR:
 		inode->i_mode	= S_IFDIR | vnode->status.mode;
@@ -65,6 +65,11 @@ static int afs_inode_map_status(struct a
 		return -EBADMSG;
 	}
 
+#ifdef CONFIG_AFS_FSCACHE
+	if (vnode->status.size != inode->i_size)
+		fscache_set_i_size(vnode->cache, vnode->status.size);
+#endif
+
 	inode->i_nlink		= vnode->status.nlink;
 	inode->i_uid		= vnode->status.owner;
 	inode->i_gid		= 0;
@@ -101,13 +106,33 @@ static int afs_inode_fetch_status(struct
 	struct afs_vnode *vnode;
 	int ret;
 
+	_enter("");
+
 	vnode = AFS_FS_I(inode);
 
 	ret = afs_vnode_fetch_status(vnode);
 
-	if (ret == 0)
+	if (ret == 0) {
+#ifdef CONFIG_AFS_FSCACHE
+		if (!vnode->cache) {
+			vnode->cache =
+				fscache_acquire_cookie(vnode->volume->cache,
+						       &afs_vnode_cache_index_def,
+						       vnode);
+			if (!vnode->cache)
+				printk("Negative\n");
+		}
+#endif
 		ret = afs_inode_map_status(vnode);
+#ifdef CONFIG_AFS_FSCACHE
+		if (ret < 0) {
+			fscache_relinquish_cookie(vnode->cache, 0);
+			vnode->cache = NULL;
+		}
+#endif
+	}
 
+	_leave(" = %d", ret);
 	return ret;
 
 } /* end afs_inode_fetch_status() */
@@ -122,6 +147,7 @@ static int afs_iget5_test(struct inode *
 
 	return inode->i_ino == data->fid.vnode &&
 		inode->i_version == data->fid.unique;
+
 } /* end afs_iget5_test() */
 
 /*****************************************************************************/
@@ -179,20 +205,11 @@ inline int afs_iget(struct super_block *
 		return ret;
 	}
 
-#ifdef AFS_CACHING_SUPPORT
-	/* set up caching before reading the status, as fetch-status reads the
-	 * first page of symlinks to see if they're really mntpts */
-	cachefs_acquire_cookie(vnode->volume->cache,
-			       NULL,
-			       vnode,
-			       &vnode->cache);
-#endif
-
 	/* okay... it's a new inode */
 	inode->i_flags |= S_NOATIME;
 	vnode->flags |= AFS_VNODE_CHANGED;
 	ret = afs_inode_fetch_status(inode);
-	if (ret<0)
+	if (ret < 0)
 		goto bad_inode;
 
 	/* success */
@@ -278,8 +295,8 @@ void afs_clear_inode(struct inode *inode
 
 	afs_vnode_give_up_callback(vnode);
 
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_relinquish_cookie(vnode->cache, 0);
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_relinquish_cookie(vnode->cache, 0);
 	vnode->cache = NULL;
 #endif
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index e88b3b6..482dbd1 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -16,15 +16,17 @@ #include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/fscache.h>
 
 /*
  * debug tracing
  */
-#define kenter(FMT, a...)	printk("==> %s("FMT")\n",__FUNCTION__ , ## a)
-#define kleave(FMT, a...)	printk("<== %s()"FMT"\n",__FUNCTION__ , ## a)
-#define kdebug(FMT, a...)	printk(FMT"\n" , ## a)
-#define kproto(FMT, a...)	printk("### "FMT"\n" , ## a)
-#define knet(FMT, a...)		printk(FMT"\n" , ## a)
+#define __kdbg(FMT, a...)	printk("[%05d] "FMT"\n", current->pid , ## a)
+#define kenter(FMT, a...)	__kdbg("==> %s("FMT")", __FUNCTION__ , ## a)
+#define kleave(FMT, a...)	__kdbg("<== %s()"FMT, __FUNCTION__ , ## a)
+#define kdebug(FMT, a...)	__kdbg(FMT , ## a)
+#define kproto(FMT, a...)	__kdbg("### "FMT , ## a)
+#define knet(FMT, a...)		__kdbg(FMT , ## a)
 
 #ifdef __KDEBUG
 #define _enter(FMT, a...)	kenter(FMT , ## a)
@@ -56,9 +58,6 @@ static inline void afs_discard_my_signal
  */
 extern struct rw_semaphore afs_proc_cells_sem;
 extern struct list_head afs_proc_cells;
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_cache_cell_index_def;
-#endif
 
 /*
  * dir.c
@@ -71,11 +70,7 @@ extern const struct file_operations afs_
  */
 extern const struct address_space_operations afs_fs_aops;
 extern struct inode_operations afs_file_inode_operations;
-
-#ifdef AFS_CACHING_SUPPORT
-extern int afs_cache_get_page_cookie(struct page *page,
-				     struct cachefs_page **_page_cookie);
-#endif
+extern const struct file_operations afs_file_file_operations;
 
 /*
  * inode.c
@@ -97,8 +92,8 @@ #endif
 /*
  * main.c
  */
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_netfs afs_cache_netfs;
+#ifdef CONFIG_AFS_FSCACHE
+extern struct fscache_netfs afs_cache_netfs;
 #endif
 
 /*
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 913c689..5840bb2 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -1,6 +1,6 @@
 /* main.c: AFS client file system
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002,5 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -14,11 +14,11 @@ #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/completion.h>
+#include <linux/fscache.h>
 #include <rxrpc/rxrpc.h>
 #include <rxrpc/transport.h>
 #include <rxrpc/call.h>
 #include <rxrpc/peer.h>
-#include "cache.h"
 #include "cell.h"
 #include "server.h"
 #include "fsclient.h"
@@ -51,12 +51,11 @@ static struct rxrpc_peer_ops afs_peer_op
 struct list_head afs_cb_hash_tbl[AFS_CB_HASH_COUNT];
 DEFINE_SPINLOCK(afs_cb_hash_lock);
 
-#ifdef AFS_CACHING_SUPPORT
-static struct cachefs_netfs_operations afs_cache_ops = {
-	.get_page_cookie	= afs_cache_get_page_cookie,
+#ifdef CONFIG_AFS_FSCACHE
+static struct fscache_netfs_operations afs_cache_ops = {
 };
 
-struct cachefs_netfs afs_cache_netfs = {
+struct fscache_netfs afs_cache_netfs = {
 	.name			= "afs",
 	.version		= 0,
 	.ops			= &afs_cache_ops,
@@ -83,10 +82,9 @@ static int __init afs_init(void)
 	if (ret < 0)
 		return ret;
 
-#ifdef AFS_CACHING_SUPPORT
+#ifdef CONFIG_AFS_FSCACHE
 	/* we want to be able to cache */
-	ret = cachefs_register_netfs(&afs_cache_netfs,
-				     &afs_cache_cell_index_def);
+	ret = fscache_register_netfs(&afs_cache_netfs);
 	if (ret < 0)
 		goto error;
 #endif
@@ -137,8 +135,8 @@ #ifdef CONFIG_KEYS_TURNED_OFF
 	afs_key_unregister();
  error_cache:
 #endif
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_unregister_netfs(&afs_cache_netfs);
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_unregister_netfs(&afs_cache_netfs);
  error:
 #endif
 	afs_cell_purge();
@@ -167,8 +165,8 @@ static void __exit afs_exit(void)
 #ifdef CONFIG_KEYS_TURNED_OFF
 	afs_key_unregister();
 #endif
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_unregister_netfs(&afs_cache_netfs);
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_unregister_netfs(&afs_cache_netfs);
 #endif
 	afs_proc_cleanup();
 
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 99785a7..2a53d51 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -78,7 +78,7 @@ int afs_mntpt_check_symlink(struct afs_v
 
 	ret = -EIO;
 	wait_on_page_locked(page);
-	buf = kmap(page);
+	buf = kmap_atomic(page, KM_USER0);
 	if (!PageUptodate(page))
 		goto out_free;
 	if (PageError(page))
@@ -101,7 +101,7 @@ int afs_mntpt_check_symlink(struct afs_v
 	ret = 0;
 
  out_free:
-	kunmap(page);
+	kunmap_atomic(buf, KM_USER0);
 	page_cache_release(page);
  out:
 	_leave(" = %d", ret);
@@ -188,9 +188,9 @@ static struct vfsmount *afs_mntpt_do_aut
 	if (!PageUptodate(page) || PageError(page))
 		goto error;
 
-	buf = kmap(page);
+	buf = kmap_atomic(page, KM_USER0);
 	memcpy(devname, buf, size);
-	kunmap(page);
+	kunmap_atomic(buf, KM_USER0);
 	page_cache_release(page);
 	page = NULL;
 
@@ -269,12 +269,12 @@ static void *afs_mntpt_follow_link(struc
  */
 static void afs_mntpt_expiry_timed_out(struct afs_timer *timer)
 {
-	kenter("");
+//	kenter("");
 
 	mark_mounts_for_expiry(&afs_vfsmounts);
 
 	afs_kafstimod_add_timer(&afs_mntpt_expiry_timer,
 				afs_mntpt_expiry_timeout * HZ);
 
-	kleave("");
+//	kleave("");
 } /* end afs_mntpt_expiry_timed_out() */
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 101d21b..db58488 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -177,6 +177,7 @@ int afs_proc_init(void)
  */
 void afs_proc_cleanup(void)
 {
+	remove_proc_entry("rootcell", proc_afs);
 	remove_proc_entry("cells", proc_afs);
 
 	remove_proc_entry("fs/afs", NULL);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 22afaae..e94628c 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -375,7 +375,6 @@ int afs_server_request_callslot(struct a
 	else if (list_empty(&server->fs_callq)) {
 		/* no one waiting */
 		server->fs_conn_cnt[nconn]++;
-		spin_unlock(&server->fs_lock);
 	}
 	else {
 		/* someone's waiting - dequeue them and wake them up */
@@ -393,9 +392,9 @@ int afs_server_request_callslot(struct a
 		}
 		pcallslot->ready = 1;
 		wake_up_process(pcallslot->task);
-		spin_unlock(&server->fs_lock);
 	}
 
+	spin_unlock(&server->fs_lock);
 	rxrpc_put_connection(callslot->conn);
 	callslot->conn = NULL;
 
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 331f730..20148bc 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -59,17 +59,21 @@ static LIST_HEAD(afs_vlocation_update_pe
 static struct afs_vlocation *afs_vlocation_update;	/* VL currently being updated */
 static DEFINE_SPINLOCK(afs_vlocation_update_lock); /* lock guarding update queue */
 
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vlocation_cache_match(void *target,
-						     const void *entry);
-static void afs_vlocation_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_vlocation_cache_index_def = {
-	.name		= "vldb",
-	.data_size	= sizeof(struct afs_cache_vlocation),
-	.keys[0]	= { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
-	.match		= afs_vlocation_cache_match,
-	.update		= afs_vlocation_cache_update,
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
+					    void *buffer, uint16_t buflen);
+static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
+					    void *buffer, uint16_t buflen);
+static fscache_checkaux_t afs_vlocation_cache_check_aux(void *cookie_netfs_data,
+							const void *buffer,
+							uint16_t buflen);
+
+static struct fscache_cookie_def afs_vlocation_cache_index_def = {
+	.name		= "AFS.vldb",
+	.type		= FSCACHE_COOKIE_TYPE_INDEX,
+	.get_key	= afs_vlocation_cache_get_key,
+	.get_aux	= afs_vlocation_cache_get_aux,
+	.check_aux	= afs_vlocation_cache_check_aux,
 };
 #endif
 
@@ -300,13 +304,12 @@ int afs_vlocation_lookup(struct afs_cell
 
 	list_add_tail(&vlocation->link, &cell->vl_list);
 
-#ifdef AFS_CACHING_SUPPORT
+#ifdef CONFIG_AFS_FSCACHE
 	/* we want to store it in the cache, plus it might already be
 	 * encached */
-	cachefs_acquire_cookie(cell->cache,
-			       &afs_volume_cache_index_def,
-			       vlocation,
-			       &vlocation->cache);
+	vlocation->cache = fscache_acquire_cookie(cell->cache,
+						  &afs_vlocation_cache_index_def,
+						  vlocation);
 
 	if (vlocation->valid)
 		goto found_in_cache;
@@ -340,7 +343,7 @@ #endif
  active:
 	active = 1;
 
-#ifdef AFS_CACHING_SUPPORT
+#ifdef CONFIG_AFS_FSCACHE
  found_in_cache:
 #endif
 	/* try to look up a cached volume in the cell VL databases by ID */
@@ -422,9 +425,9 @@ #endif
 
 	afs_kafstimod_add_timer(&vlocation->upd_timer, 10 * HZ);
 
-#ifdef AFS_CACHING_SUPPORT
+#ifdef CONFIG_AFS_FSCACHE
 	/* update volume entry in local cache */
-	cachefs_update_cookie(vlocation->cache);
+	fscache_update_cookie(vlocation->cache);
 #endif
 
 	*_vlocation = vlocation;
@@ -438,8 +441,8 @@ #endif
 		}
 		else {
 			list_del(&vlocation->link);
-#ifdef AFS_CACHING_SUPPORT
-			cachefs_relinquish_cookie(vlocation->cache, 0);
+#ifdef CONFIG_AFS_FSCACHE
+			fscache_relinquish_cookie(vlocation->cache, 0);
 #endif
 			afs_put_cell(vlocation->cell);
 			kfree(vlocation);
@@ -536,8 +539,8 @@ void afs_vlocation_do_timeout(struct afs
 	}
 
 	/* we can now destroy it properly */
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_relinquish_cookie(vlocation->cache, 0);
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_relinquish_cookie(vlocation->cache, 0);
 #endif
 	afs_put_cell(cell);
 
@@ -888,65 +891,103 @@ static void afs_vlocation_update_discard
 
 /*****************************************************************************/
 /*
- * match a VLDB record stored in the cache
- * - may also load target from entry
+ * set the key for the index entry
  */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vlocation_cache_match(void *target,
-						     const void *entry)
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
+					    void *buffer, uint16_t bufmax)
 {
-	const struct afs_cache_vlocation *vldb = entry;
-	struct afs_vlocation *vlocation = target;
+	const struct afs_vlocation *vlocation = cookie_netfs_data;
+	uint16_t klen;
 
-	_enter("{%s},{%s}", vlocation->vldb.name, vldb->name);
+	_enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
 
-	if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0
-	    ) {
-		if (!vlocation->valid ||
-		    vlocation->vldb.rtime == vldb->rtime
-		    ) {
-			vlocation->vldb = *vldb;
-			vlocation->valid = 1;
-			_leave(" = SUCCESS [c->m]");
-			return CACHEFS_MATCH_SUCCESS;
-		}
-		/* need to update cache if cached info differs */
-		else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) {
-			/* delete if VIDs for this name differ */
-			if (memcmp(&vlocation->vldb.vid,
-				   &vldb->vid,
-				   sizeof(vldb->vid)) != 0) {
-				_leave(" = DELETE");
-				return CACHEFS_MATCH_SUCCESS_DELETE;
-			}
+	klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name));
+	if (klen > bufmax)
+		return 0;
 
-			_leave(" = UPDATE");
-			return CACHEFS_MATCH_SUCCESS_UPDATE;
-		}
-		else {
-			_leave(" = SUCCESS");
-			return CACHEFS_MATCH_SUCCESS;
-		}
-	}
+	memcpy(buffer, vlocation->vldb.name, klen);
+
+	_leave(" = %u", klen);
+	return klen;
 
-	_leave(" = FAILED");
-	return CACHEFS_MATCH_FAILED;
-} /* end afs_vlocation_cache_match() */
+} /* end afs_vlocation_cache_get_key() */
 #endif
 
 /*****************************************************************************/
 /*
- * update a VLDB record stored in the cache
+ * provide new auxilliary cache data
  */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_vlocation_cache_update(void *source, void *entry)
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
+					    void *buffer, uint16_t bufmax)
 {
-	struct afs_cache_vlocation *vldb = entry;
-	struct afs_vlocation *vlocation = source;
+	const struct afs_vlocation *vlocation = cookie_netfs_data;
+	uint16_t dlen;
 
-	_enter("");
+	_enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
+
+	dlen = sizeof(struct afs_cache_vlocation);
+	dlen -= offsetof(struct afs_cache_vlocation, nservers);
+	if (dlen > bufmax)
+		return 0;
+
+	memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen);
+
+	_leave(" = %u", dlen);
+	return dlen;
+
+} /* end afs_vlocation_cache_get_aux() */
+#endif
+
+/*****************************************************************************/
+/*
+ * check that the auxilliary data indicates that the entry is still valid
+ */
+#ifdef CONFIG_AFS_FSCACHE
+static fscache_checkaux_t afs_vlocation_cache_check_aux(void *cookie_netfs_data,
+							const void *buffer,
+							uint16_t buflen)
+{
+	const struct afs_cache_vlocation *cvldb;
+	struct afs_vlocation *vlocation = cookie_netfs_data;
+	uint16_t dlen;
+
+	_enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen);
+
+	/* check the size of the data is what we're expecting */
+	dlen = sizeof(struct afs_cache_vlocation);
+	dlen -= offsetof(struct afs_cache_vlocation, nservers);
+	if (dlen != buflen)
+		return FSCACHE_CHECKAUX_OBSOLETE;
+
+	cvldb = container_of(buffer, struct afs_cache_vlocation, nservers);
+
+	/* if what's on disk is more valid than what's in memory, then use the
+	 * VL record from the cache */
+	if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) {
+		memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen);
+		vlocation->valid = 1;
+		_leave(" = SUCCESS [c->m]");
+		return FSCACHE_CHECKAUX_OKAY;
+	}
+
+	/* need to update the cache if the cached info differs */
+	if (memcmp(&vlocation->vldb, buffer, dlen) != 0) {
+		/* delete if the volume IDs for this name differ */
+		if (memcmp(&vlocation->vldb.vid, &cvldb->vid,
+			   sizeof(cvldb->vid)) != 0
+		    ) {
+			_leave(" = OBSOLETE");
+			return FSCACHE_CHECKAUX_OBSOLETE;
+		}
+
+		_leave(" = UPDATE");
+		return FSCACHE_CHECKAUX_NEEDS_UPDATE;
+	}
 
-	*vldb = vlocation->vldb;
+	_leave(" = OKAY");
+	return FSCACHE_CHECKAUX_OKAY;
 
-} /* end afs_vlocation_cache_update() */
+} /* end afs_vlocation_cache_check_aux() */
 #endif
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index cf62da5..b6cba1e 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -29,17 +29,30 @@ struct afs_timer_ops afs_vnode_cb_timed_
 	.timed_out	= afs_vnode_cb_timed_out,
 };
 
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vnode_cache_match(void *target,
-						 const void *entry);
-static void afs_vnode_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_vnode_cache_index_def = {
-	.name		= "vnode",
-	.data_size	= sizeof(struct afs_cache_vnode),
-	.keys[0]	= { CACHEFS_INDEX_KEYS_BIN, 4 },
-	.match		= afs_vnode_cache_match,
-	.update		= afs_vnode_cache_update,
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
+					void *buffer, uint16_t buflen);
+static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
+				     uint64_t *size);
+static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
+					void *buffer, uint16_t buflen);
+static fscache_checkaux_t afs_vnode_cache_check_aux(void *cookie_netfs_data,
+						    const void *buffer,
+						    uint16_t buflen);
+static void afs_vnode_cache_mark_pages_cached(void *cookie_netfs_data,
+					      struct address_space *mapping,
+					      struct pagevec *cached_pvec);
+static void afs_vnode_cache_now_uncached(void *cookie_netfs_data);
+
+struct fscache_cookie_def afs_vnode_cache_index_def = {
+	.name			= "AFS.vnode",
+	.type			= FSCACHE_COOKIE_TYPE_DATAFILE,
+	.get_key		= afs_vnode_cache_get_key,
+	.get_attr		= afs_vnode_cache_get_attr,
+	.get_aux		= afs_vnode_cache_get_aux,
+	.check_aux		= afs_vnode_cache_check_aux,
+	.mark_pages_cached	= afs_vnode_cache_mark_pages_cached,
+	.now_uncached		= afs_vnode_cache_now_uncached,
 };
 #endif
 
@@ -188,6 +201,8 @@ int afs_vnode_fetch_status(struct afs_vn
 
 	if (vnode->update_cnt > 0) {
 		/* someone else started a fetch */
+		_debug("conflict");
+
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&vnode->update_waitq, &myself);
 
@@ -219,6 +234,7 @@ int afs_vnode_fetch_status(struct afs_vn
 		spin_unlock(&vnode->lock);
 		set_current_state(TASK_RUNNING);
 
+		_leave(" [conflicted, %d", !!(vnode->flags & AFS_VNODE_DELETED));
 		return vnode->flags & AFS_VNODE_DELETED ? -ENOENT : 0;
 	}
 
@@ -341,54 +357,198 @@ int afs_vnode_give_up_callback(struct af
 
 /*****************************************************************************/
 /*
- * match a vnode record stored in the cache
+ * set the key for the index entry
  */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vnode_cache_match(void *target,
-						 const void *entry)
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
+					void *buffer, uint16_t bufmax)
 {
-	const struct afs_cache_vnode *cvnode = entry;
-	struct afs_vnode *vnode = target;
+	const struct afs_vnode *vnode = cookie_netfs_data;
+	uint16_t klen;
 
-	_enter("{%x,%x,%Lx},{%x,%x,%Lx}",
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       vnode->status.version,
-	       cvnode->vnode_id,
-	       cvnode->vnode_unique,
-	       cvnode->data_version);
-
-	if (vnode->fid.vnode != cvnode->vnode_id) {
-		_leave(" = FAILED");
-		return CACHEFS_MATCH_FAILED;
+	_enter("{%x,%x,%Lx},%p,%u",
+	       vnode->fid.vnode, vnode->fid.unique, vnode->status.version,
+	       buffer, bufmax);
+
+	klen = sizeof(vnode->fid.vnode);
+	if (klen > bufmax)
+		return 0;
+
+	memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode));
+
+	_leave(" = %u", klen);
+	return klen;
+
+} /* end afs_vnode_cache_get_key() */
+#endif
+
+/*****************************************************************************/
+/*
+ * provide an updated file attributes
+ */
+#ifdef CONFIG_AFS_FSCACHE
+static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
+				     uint64_t *size)
+{
+	const struct afs_vnode *vnode = cookie_netfs_data;
+
+	_enter("{%x,%x,%Lx},",
+	       vnode->fid.vnode, vnode->fid.unique, vnode->status.version);
+
+	*size = i_size_read((struct inode *) &vnode->vfs_inode);
+
+} /* end afs_vnode_cache_get_attr() */
+#endif
+
+/*****************************************************************************/
+/*
+ * provide new auxilliary cache data
+ */
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
+					void *buffer, uint16_t bufmax)
+{
+	const struct afs_vnode *vnode = cookie_netfs_data;
+	uint16_t dlen;
+
+	_enter("{%x,%x,%Lx},%p,%u",
+	       vnode->fid.vnode, vnode->fid.unique, vnode->status.version,
+	       buffer, bufmax);
+
+	dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.version);
+	if (dlen > bufmax)
+		return 0;
+
+	memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique));
+	buffer += sizeof(vnode->fid.unique);
+	memcpy(buffer, &vnode->status.version, sizeof(vnode->status.version));
+
+	_leave(" = %u", dlen);
+	return dlen;
+
+} /* end afs_vnode_cache_get_aux() */
+#endif
+
+/*****************************************************************************/
+/*
+ * check that the auxilliary data indicates that the entry is still valid
+ */
+#ifdef CONFIG_AFS_FSCACHE
+static fscache_checkaux_t afs_vnode_cache_check_aux(void *cookie_netfs_data,
+						    const void *buffer,
+						    uint16_t buflen)
+{
+	struct afs_vnode *vnode = cookie_netfs_data;
+	uint16_t dlen;
+
+	_enter("{%x,%x,%Lx},%p,%u",
+	       vnode->fid.vnode, vnode->fid.unique, vnode->status.version,
+	       buffer, buflen);
+
+	/* check the size of the data is what we're expecting */
+	dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.version);
+	if (dlen != buflen) {
+		_leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen);
+		return FSCACHE_CHECKAUX_OBSOLETE;
 	}
 
-	if (vnode->fid.unique != cvnode->vnode_unique ||
-	    vnode->status.version != cvnode->data_version) {
-		_leave(" = DELETE");
-		return CACHEFS_MATCH_SUCCESS_DELETE;
+	if (memcmp(buffer,
+		   &vnode->fid.unique,
+		   sizeof(vnode->fid.unique)
+		   ) != 0
+	    ) {
+		unsigned unique;
+
+		memcpy(&unique, buffer, sizeof(unique));
+
+		_leave(" = OBSOLETE [uniq %x != %x]",
+		       unique, vnode->fid.unique);
+		return FSCACHE_CHECKAUX_OBSOLETE;
+	}
+
+	if (memcmp(buffer + sizeof(vnode->fid.unique),
+		   &vnode->status.version,
+		   sizeof(vnode->status.version)
+		   ) != 0
+	    ) {
+		afs_dataversion_t version;
+
+		memcpy(&version, buffer + sizeof(vnode->fid.unique),
+		       sizeof(version));
+
+		_leave(" = OBSOLETE [vers %llx != %llx]",
+		       version, vnode->status.version);
+		return FSCACHE_CHECKAUX_OBSOLETE;
 	}
 
 	_leave(" = SUCCESS");
-	return CACHEFS_MATCH_SUCCESS;
-} /* end afs_vnode_cache_match() */
+	return FSCACHE_CHECKAUX_OKAY;
+
+} /* end afs_vnode_cache_check_aux() */
 #endif
 
 /*****************************************************************************/
 /*
- * update a vnode record stored in the cache
+ * indication of pages that now have cache metadata retained
+ * - this function should mark the specified pages as now being cached
  */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_vnode_cache_update(void *source, void *entry)
+#ifdef CONFIG_AFS_FSCACHE
+static void afs_vnode_cache_mark_pages_cached(void *cookie_netfs_data,
+					      struct address_space *mapping,
+					      struct pagevec *cached_pvec)
 {
-	struct afs_cache_vnode *cvnode = entry;
-	struct afs_vnode *vnode = source;
+	unsigned long loop;
 
-	_enter("");
+	for (loop = 0; loop < cached_pvec->nr; loop++) {
+		struct page *page = cached_pvec->pages[loop];
 
-	cvnode->vnode_id	= vnode->fid.vnode;
-	cvnode->vnode_unique	= vnode->fid.unique;
-	cvnode->data_version	= vnode->status.version;
+		_debug("- mark %p{%lx}", page, page->index);
 
-} /* end afs_vnode_cache_update() */
+		SetPagePrivate(page);
+	}
+
+} /* end afs_vnode_cache_mark_pages_cached() */
 #endif
+
+/*****************************************************************************/
+/*
+ * indication the cookie is no longer uncached
+ * - this function is called when the backing store currently caching a cookie
+ *   is removed
+ * - the netfs should use this to clean up any markers indicating cached pages
+ * - this is mandatory for any object that may have data
+ */
+static void afs_vnode_cache_now_uncached(void *cookie_netfs_data)
+{
+	struct afs_vnode *vnode = cookie_netfs_data;
+	struct pagevec pvec;
+	pgoff_t first;
+	int loop, nr_pages;
+
+	_enter("{%x,%x,%Lx}",
+	       vnode->fid.vnode, vnode->fid.unique, vnode->status.version);
+
+	pagevec_init(&pvec, 0);
+	first = 0;
+
+	for (;;) {
+		/* grab a bunch of pages to clean */
+		nr_pages = pagevec_lookup(&pvec, vnode->vfs_inode.i_mapping,
+					  first,
+					  PAGEVEC_SIZE - pagevec_count(&pvec));
+		if (!nr_pages)
+			break;
+
+		for (loop = 0; loop < nr_pages; loop++)
+			ClearPagePrivate(pvec.pages[loop]);
+
+		first = pvec.pages[nr_pages - 1]->index + 1;
+
+		pvec.nr = nr_pages;
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+
+	_leave("");
+
+} /* end afs_vnode_cache_now_uncached() */
diff --git a/fs/afs/vnode.h b/fs/afs/vnode.h
index b86a971..3f0602d 100644
--- a/fs/afs/vnode.h
+++ b/fs/afs/vnode.h
@@ -13,9 +13,9 @@ #ifndef _LINUX_AFS_VNODE_H
 #define _LINUX_AFS_VNODE_H
 
 #include <linux/fs.h>
+#include <linux/fscache.h>
 #include "server.h"
 #include "kafstimod.h"
-#include "cache.h"
 
 #ifdef __KERNEL__
 
@@ -32,8 +32,8 @@ struct afs_cache_vnode
 	afs_dataversion_t	data_version;	/* data version */
 };
 
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_vnode_cache_index_def;
+#ifdef CONFIG_AFS_FSCACHE
+extern struct fscache_cookie_def afs_vnode_cache_index_def;
 #endif
 
 /*****************************************************************************/
@@ -47,8 +47,8 @@ struct afs_vnode
 	struct afs_volume	*volume;	/* volume on which vnode resides */
 	struct afs_fid		fid;		/* the file identifier for this inode */
 	struct afs_file_status	status;		/* AFS status info for this file */
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_cookie	*cache;		/* caching cookie */
+#ifdef CONFIG_AFS_FSCACHE
+	struct fscache_cookie	*cache;		/* caching cookie */
 #endif
 
 	wait_queue_head_t	update_waitq;	/* status fetch waitqueue */
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 0ff4b86..0bd5578 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -15,10 +15,10 @@ #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/fscache.h>
 #include "volume.h"
 #include "vnode.h"
 #include "cell.h"
-#include "cache.h"
 #include "cmservice.h"
 #include "fsclient.h"
 #include "vlclient.h"
@@ -28,18 +28,14 @@ #ifdef __KDEBUG
 static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
 #endif
 
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_volume_cache_match(void *target,
-						  const void *entry);
-static void afs_volume_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_volume_cache_index_def = {
-	.name		= "volume",
-	.data_size	= sizeof(struct afs_cache_vhash),
-	.keys[0]	= { CACHEFS_INDEX_KEYS_BIN, 1 },
-	.keys[1]	= { CACHEFS_INDEX_KEYS_BIN, 1 },
-	.match		= afs_volume_cache_match,
-	.update		= afs_volume_cache_update,
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
+					 void *buffer, uint16_t buflen);
+
+static struct fscache_cookie_def afs_volume_cache_index_def = {
+	.name		= "AFS.volume",
+	.type		= FSCACHE_COOKIE_TYPE_INDEX,
+	.get_key	= afs_volume_cache_get_key,
 };
 #endif
 
@@ -214,11 +210,10 @@ int afs_volume_lookup(const char *name, 
 	}
 
 	/* attach the cache and volume location */
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_acquire_cookie(vlocation->cache,
-			       &afs_vnode_cache_index_def,
-			       volume,
-			       &volume->cache);
+#ifdef CONFIG_AFS_FSCACHE
+	volume->cache = fscache_acquire_cookie(vlocation->cache,
+					       &afs_volume_cache_index_def,
+					       volume);
 #endif
 
 	afs_get_vlocation(vlocation);
@@ -286,8 +281,8 @@ void afs_put_volume(struct afs_volume *v
 	up_write(&vlocation->cell->vl_sem);
 
 	/* finish cleaning up the volume */
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_relinquish_cookie(volume->cache, 0);
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_relinquish_cookie(volume->cache, 0);
 #endif
 	afs_put_vlocation(vlocation);
 
@@ -481,40 +476,25 @@ int afs_volume_release_fileserver(struct
 
 /*****************************************************************************/
 /*
- * match a volume hash record stored in the cache
+ * set the key for the index entry
  */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_volume_cache_match(void *target,
-						  const void *entry)
+#ifdef CONFIG_AFS_FSCACHE
+static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
+					void *buffer, uint16_t bufmax)
 {
-	const struct afs_cache_vhash *vhash = entry;
-	struct afs_volume *volume = target;
-
-	_enter("{%u},{%u}", volume->type, vhash->vtype);
+	const struct afs_volume *volume = cookie_netfs_data;
+	uint16_t klen;
 
-	if (volume->type == vhash->vtype) {
-		_leave(" = SUCCESS");
-		return CACHEFS_MATCH_SUCCESS;
-	}
-
-	_leave(" = FAILED");
-	return CACHEFS_MATCH_FAILED;
-} /* end afs_volume_cache_match() */
-#endif
+	_enter("{%u},%p,%u", volume->type, buffer, bufmax);
 
-/*****************************************************************************/
-/*
- * update a volume hash record stored in the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_volume_cache_update(void *source, void *entry)
-{
-	struct afs_cache_vhash *vhash = entry;
-	struct afs_volume *volume = source;
+	klen = sizeof(volume->type);
+	if (klen > bufmax)
+		return 0;
 
-	_enter("");
+	memcpy(buffer, &volume->type, sizeof(volume->type));
 
-	vhash->vtype = volume->type;
+	_leave(" = %u", klen);
+	return klen;
 
-} /* end afs_volume_cache_update() */
+} /* end afs_volume_cache_get_key() */
 #endif
diff --git a/fs/afs/volume.h b/fs/afs/volume.h
index bfdcf19..fc9895a 100644
--- a/fs/afs/volume.h
+++ b/fs/afs/volume.h
@@ -12,11 +12,11 @@
 #ifndef _LINUX_AFS_VOLUME_H
 #define _LINUX_AFS_VOLUME_H
 
+#include <linux/fscache.h>
 #include "types.h"
 #include "fsclient.h"
 #include "kafstimod.h"
 #include "kafsasyncd.h"
-#include "cache.h"
 
 typedef enum {
 	AFS_VLUPD_SLEEP,		/* sleeping waiting for update timer to fire */
@@ -45,24 +45,6 @@ #define AFS_VOL_VTM_BAK	0x04 /* backup v
 	time_t			rtime;		/* last retrieval time */
 };
 
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_vlocation_cache_index_def;
-#endif
-
-/*****************************************************************************/
-/*
- * volume -> vnode hash table entry
- */
-struct afs_cache_vhash
-{
-	afs_voltype_t		vtype;		/* which volume variation */
-	uint8_t			hash_bucket;	/* which hash bucket this represents */
-} __attribute__((packed));
-
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_volume_cache_index_def;
-#endif
-
 /*****************************************************************************/
 /*
  * AFS volume location record
@@ -73,8 +55,8 @@ struct afs_vlocation
 	struct list_head	link;		/* link in cell volume location list */
 	struct afs_timer	timeout;	/* decaching timer */
 	struct afs_cell		*cell;		/* cell to which volume belongs */
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_cookie	*cache;		/* caching cookie */
+#ifdef CONFIG_AFS_FSCACHE
+	struct fscache_cookie	*cache;		/* caching cookie */
 #endif
 	struct afs_cache_vlocation vldb;	/* volume information DB record */
 	struct afs_volume	*vols[3];	/* volume access record pointer (index by type) */
@@ -109,8 +91,8 @@ struct afs_volume
 	atomic_t		usage;
 	struct afs_cell		*cell;		/* cell to which belongs (unrefd ptr) */
 	struct afs_vlocation	*vlocation;	/* volume location */
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_cookie	*cache;		/* caching cookie */
+#ifdef CONFIG_AFS_FSCACHE
+	struct fscache_cookie	*cache;		/* caching cookie */
 #endif
 	afs_volid_t		vid;		/* volume ID */
 	afs_voltype_t		type;		/* type of volume */
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d6603d0..47e38f3 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -96,7 +96,6 @@ #define AUTOFS_TYPE_OFFSET       0x0004
 
 struct autofs_sb_info {
 	u32 magic;
-	struct dentry *root;
 	int pipefd;
 	struct file *pipe;
 	pid_t oz_pgrp;
@@ -231,4 +230,4 @@ out:
 }
 
 void autofs4_dentry_release(struct dentry *);
-
+extern void autofs4_kill_sb(struct super_block *);
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index 5d91933..723a1c5 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -24,7 +24,7 @@ static struct file_system_type autofs_fs
 	.owner		= THIS_MODULE,
 	.name		= "autofs",
 	.get_sb		= autofs_get_sb,
-	.kill_sb	= kill_anon_super,
+	.kill_sb	= autofs4_kill_sb,
 };
 
 static int __init init_autofs4_fs(void)
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index fde78b1..1bf68c5 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -95,7 +95,7 @@ void autofs4_free_ino(struct autofs_info
  */
 static void autofs4_force_release(struct autofs_sb_info *sbi)
 {
-	struct dentry *this_parent = sbi->root;
+	struct dentry *this_parent = sbi->sb->s_root;
 	struct list_head *next;
 
 	spin_lock(&dcache_lock);
@@ -126,7 +126,7 @@ resume:
 		spin_lock(&dcache_lock);
 	}
 
-	if (this_parent != sbi->root) {
+	if (this_parent != sbi->sb->s_root) {
 		struct dentry *dentry = this_parent;
 
 		next = this_parent->d_u.d_child.next;
@@ -139,15 +139,9 @@ resume:
 		goto resume;
 	}
 	spin_unlock(&dcache_lock);
-
-	dput(sbi->root);
-	sbi->root = NULL;
-	shrink_dcache_sb(sbi->sb);
-
-	return;
 }
 
-static void autofs4_put_super(struct super_block *sb)
+void autofs4_kill_sb(struct super_block *sb)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(sb);
 
@@ -162,6 +156,7 @@ static void autofs4_put_super(struct sup
 	kfree(sbi);
 
 	DPRINTK("shutting down");
+	kill_anon_super(sb);
 }
 
 static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
@@ -188,7 +183,6 @@ static int autofs4_show_options(struct s
 }
 
 static struct super_operations autofs4_sops = {
-	.put_super	= autofs4_put_super,
 	.statfs		= simple_statfs,
 	.show_options	= autofs4_show_options,
 };
@@ -314,7 +308,6 @@ int autofs4_fill_super(struct super_bloc
 
 	s->s_fs_info = sbi;
 	sbi->magic = AUTOFS_SBI_MAGIC;
-	sbi->root = NULL;
 	sbi->pipefd = -1;
 	sbi->catatonic = 0;
 	sbi->exp_timeout = 0;
@@ -396,13 +389,6 @@ int autofs4_fill_super(struct super_bloc
 	sbi->pipefd = pipefd;
 
 	/*
-	 * Take a reference to the root dentry so we get a chance to
-	 * clean up the dentry tree on umount.
-	 * See autofs4_force_release.
-	 */
-	sbi->root = dget(root);
-
-	/*
 	 * Success! Install the root dentry now to indicate completion.
 	 */
 	s->s_root = root;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index ce103e7..c0a6c8d 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -45,7 +45,6 @@ void autofs4_catatonic_mode(struct autof
 		fput(sbi->pipe);	/* Close the pipe */
 		sbi->pipe = NULL;
 	}
-	shrink_dcache_sb(sbi->sb);
 }
 
 static int autofs4_write(struct file *file, const void *addr, int bytes)
diff --git a/fs/buffer.c b/fs/buffer.c
index 3660dcb..31a01da 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -184,6 +184,8 @@ int fsync_super(struct super_block *sb)
 	return sync_blockdev(sb->s_bdev);
 }
 
+EXPORT_SYMBOL(fsync_super);
+
 /*
  * Write out and wait upon all dirty data associated with this
  * device.   Filesystem data as well as the underlying block
diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile
new file mode 100644
index 0000000..c1522d9
--- /dev/null
+++ b/fs/cachefiles/Makefile
@@ -0,0 +1,18 @@
+#
+# Makefile for caching in a mounted filesystem
+#
+
+cachefiles-objs := \
+	cf-bind.o \
+	cf-interface.o \
+	cf-key.o \
+	cf-main.o \
+	cf-namei.o \
+	cf-proc.o \
+	cf-xattr.o
+
+ifeq ($(CONFIG_SYSCTL),y)
+cachefiles-objs += cf-sysctl.o
+endif
+
+obj-$(CONFIG_CACHEFILES) := cachefiles.o
diff --git a/fs/cachefiles/cf-bind.c b/fs/cachefiles/cf-bind.c
new file mode 100644
index 0000000..0c14c37
--- /dev/null
+++ b/fs/cachefiles/cf-bind.c
@@ -0,0 +1,283 @@
+/* cf-bind.c: bind and unbind a cache from the filesystem backing it
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/namespace.h>
+#include <linux/statfs.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include "internal.h"
+
+static int cachefiles_proc_add_cache(struct cachefiles_cache *cache,
+				     struct vfsmount *mnt);
+
+/*****************************************************************************/
+/*
+ * bind a directory as a cache
+ */
+int cachefiles_proc_bind(struct cachefiles_cache *cache, char *args)
+{
+	_enter("{%u,%u,%u},%s",
+	       cache->brun_percent,
+	       cache->bcull_percent,
+	       cache->bstop_percent,
+	       args);
+
+	/* start by checking things over */
+	ASSERT(cache->bstop_percent >= 0 &&
+	       cache->bstop_percent < cache->bcull_percent &&
+	       cache->bcull_percent < cache->brun_percent &&
+	       cache->brun_percent  < 100);
+
+	if (*args) {
+		kerror("'bind' command doesn't take an argument");
+		return -EINVAL;
+	}
+
+	if (!cache->rootdirname) {
+		kerror("No cache directory specified");
+		return -EINVAL;
+	}
+
+	/* don't permit already bound caches to be re-bound */
+	if (test_bit(CACHEFILES_READY, &cache->flags)) {
+		kerror("Cache already bound");
+		return -EBUSY;
+	}
+
+	/* make sure we have copies of the tag and dirname strings */
+	if (!cache->tag) {
+		/* the tag string is released by the fops->release()
+		 * function, so we don't release it on error here */
+		cache->tag = kstrdup("CacheFiles", GFP_KERNEL);
+		if (!cache->tag)
+			return -ENOMEM;
+	}
+
+	/* add the cache */
+	return cachefiles_proc_add_cache(cache, NULL);
+
+} /* end cachefiles_proc_bind() */
+
+/*****************************************************************************/
+/*
+ * add a cache
+ */
+static int cachefiles_proc_add_cache(struct cachefiles_cache *cache,
+				     struct vfsmount *mnt)
+{
+	struct cachefiles_object *fsdef;
+	struct nameidata nd;
+	struct kstatfs stats;
+	struct dentry *graveyard, *cachedir, *root;
+	int ret;
+
+	_enter("");
+
+	/* allocate the root index object */
+	ret = -ENOMEM;
+
+	fsdef = kmem_cache_alloc(cachefiles_object_jar, SLAB_KERNEL);
+	if (!fsdef)
+		goto error_root_object;
+
+	atomic_set(&fsdef->usage, 1);
+	atomic_set(&fsdef->fscache_usage, 1);
+	fsdef->type = FSCACHE_COOKIE_TYPE_INDEX;
+
+	_debug("- fsdef %p", fsdef);
+
+	/* look up the directory at the root of the cache */
+	memset(&nd, 0, sizeof(nd));
+
+	ret = path_lookup(cache->rootdirname, LOOKUP_DIRECTORY, &nd);
+	if (ret < 0)
+		goto error_open_root;
+
+	/* bind to the special mountpoint we've prepared */
+	if (mnt) {
+		atomic_inc(&nd.mnt->mnt_sb->s_active);
+		mnt->mnt_sb = nd.mnt->mnt_sb;
+		mnt->mnt_flags = nd.mnt->mnt_flags;
+		mnt->mnt_flags |= MNT_NOSUID | MNT_NOEXEC | MNT_NODEV;
+		mnt->mnt_root = dget(nd.dentry);
+		mnt->mnt_mountpoint = mnt->mnt_root;
+
+		/* copy the name, but ignore kstrdup() failing ENOMEM - we'll
+		 * just end up with an devicenameless mountpoint */
+		mnt->mnt_devname = kstrdup(nd.mnt->mnt_devname, GFP_KERNEL);
+		path_release(&nd);
+
+		cache->mnt = mntget(mnt);
+		root = dget(mnt->mnt_root);
+	}
+	else {
+		cache->mnt = nd.mnt;
+		root = nd.dentry;
+
+		nd.mnt = NULL;
+		nd.dentry = NULL;
+		path_release(&nd);
+	}
+
+	/* check parameters */
+	ret = -EOPNOTSUPP;
+	if (!root->d_inode ||
+	    !root->d_inode->i_op ||
+	    !root->d_inode->i_op->lookup ||
+	    !root->d_inode->i_op->mkdir ||
+	    !root->d_inode->i_op->setxattr ||
+	    !root->d_inode->i_op->getxattr ||
+	    !root->d_sb ||
+	    !root->d_sb->s_op ||
+	    !root->d_sb->s_op->statfs ||
+	    !root->d_sb->s_op->sync_fs)
+		goto error_unsupported;
+
+	ret = -EROFS;
+	if (root->d_sb->s_flags & MS_RDONLY)
+		goto error_unsupported;
+
+	/* get the cache size and blocksize */
+	ret = root->d_sb->s_op->statfs(root, &stats);
+	if (ret < 0)
+		goto error_unsupported;
+
+	ret = -ERANGE;
+	if (stats.f_bsize <= 0)
+		goto error_unsupported;
+
+	ret = -EOPNOTSUPP;
+	if (stats.f_bsize > PAGE_SIZE)
+		goto error_unsupported;
+
+	cache->bsize = stats.f_bsize;
+	cache->bshift = 0;
+	if (stats.f_bsize < PAGE_SIZE)
+		cache->bshift = PAGE_SHIFT - long_log2(stats.f_bsize);
+
+	_debug("blksize %u (shift %u)",
+	       cache->bsize, cache->bshift);
+
+	_debug("size %llu, avail %llu", stats.f_blocks, stats.f_bavail);
+
+	/* set up caching limits */
+	stats.f_blocks >>= cache->bshift;
+	do_div(stats.f_blocks, 100);
+	cache->bstop = stats.f_blocks * cache->bstop_percent;
+	cache->bcull = stats.f_blocks * cache->bcull_percent;
+	cache->brun  = stats.f_blocks * cache->brun_percent;
+
+	_debug("limits {%llu,%llu,%llu}",
+	       cache->brun,
+	       cache->bcull,
+	       cache->bstop);
+
+	/* get the cache directory and check its type */
+	cachedir = cachefiles_get_directory(cache, root, "cache");
+	if (IS_ERR(cachedir)) {
+		ret = PTR_ERR(cachedir);
+		goto error_unsupported;
+	}
+
+	fsdef->dentry = cachedir;
+
+	ret = cachefiles_check_object_type(fsdef);
+	if (ret < 0)
+		goto error_unsupported;
+
+	/* get the graveyard directory */
+	graveyard = cachefiles_get_directory(cache, root, "graveyard");
+	if (IS_ERR(graveyard)) {
+		ret = PTR_ERR(graveyard);
+		goto error_unsupported;
+	}
+
+	cache->graveyard = graveyard;
+
+	/* publish the cache */
+	fscache_init_cache(&cache->cache,
+			   &cachefiles_cache_ops,
+			   "%02x:%02x",
+			   MAJOR(fsdef->dentry->d_sb->s_dev),
+			   MINOR(fsdef->dentry->d_sb->s_dev)
+			   );
+
+	ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag);
+	if (ret < 0)
+		goto error_add_cache;
+
+	/* done */
+	set_bit(CACHEFILES_READY, &cache->flags);
+	dput(root);
+
+	printk(KERN_INFO "CacheFiles:"
+	       " File cache on %s registered\n",
+	       cache->cache.identifier);
+
+	/* check how much space the cache has */
+	cachefiles_has_space(cache, 0);
+
+	return 0;
+
+error_add_cache:
+	dput(cache->graveyard);
+	cache->graveyard = NULL;
+error_unsupported:
+	mntput(cache->mnt);
+	cache->mnt = NULL;
+	dput(fsdef->dentry);
+	fsdef->dentry = NULL;
+	dput(root);
+error_open_root:
+	kmem_cache_free(cachefiles_object_jar, fsdef);
+error_root_object:
+	kerror("Failed to register: %d", ret);
+	return ret;
+
+} /* end cachefiles_proc_add_cache() */
+
+/*****************************************************************************/
+/*
+ * unbind a cache on fd release
+ */
+void cachefiles_proc_unbind(struct cachefiles_cache *cache)
+{
+	_enter("");
+
+	if (test_bit(CACHEFILES_READY, &cache->flags)) {
+		printk(KERN_INFO "CacheFiles:"
+		       " File cache on %s unregistering\n",
+		       cache->cache.identifier);
+
+		fscache_withdraw_cache(&cache->cache);
+	}
+
+	if (cache->cache.fsdef)
+		cache->cache.ops->put_object(cache->cache.fsdef);
+
+	dput(cache->graveyard);
+	mntput(cache->mnt);
+
+	kfree(cache->rootdirname);
+	kfree(cache->tag);
+
+	_leave("");
+
+} /* end cachefiles_proc_unbind() */
diff --git a/fs/cachefiles/cf-interface.c b/fs/cachefiles/cf-interface.c
new file mode 100644
index 0000000..32ad844
--- /dev/null
+++ b/fs/cachefiles/cf-interface.c
@@ -0,0 +1,1315 @@
+/* cf-interface.c: CacheFiles to FS-Cache interface
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/statfs.h>
+#include <linux/buffer_head.h>
+#include "internal.h"
+
+#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
+#define log2(n) ffz(~(n))
+
+/*****************************************************************************/
+/*
+ * look up the nominated node in this cache, creating it if necessary
+ */
+static struct fscache_object *cachefiles_lookup_object(
+	struct fscache_cache *_cache,
+	struct fscache_object *_parent,
+	struct fscache_cookie *cookie)
+{
+	struct cachefiles_object *parent, *object;
+	struct cachefiles_cache *cache;
+	struct cachefiles_xattr *auxdata;
+	unsigned keylen, auxlen;
+	void *buffer;
+	char *key;
+	int ret;
+
+	ASSERT(_parent);
+
+	cache = container_of(_cache, struct cachefiles_cache, cache);
+	parent = container_of(_parent, struct cachefiles_object, fscache);
+
+	_enter("{%s},%p,%p", cache->cache.identifier, parent, cookie);
+
+	/* create a new object record and a temporary leaf image */
+	object = kmem_cache_alloc(cachefiles_object_jar, SLAB_KERNEL);
+	if (!object)
+		goto nomem_object;
+
+	atomic_set(&object->usage, 1);
+	atomic_set(&object->fscache_usage, 1);
+
+	fscache_object_init(&object->fscache);
+	object->fscache.cookie = cookie;
+	object->fscache.cache = parent->fscache.cache;
+
+	object->type = cookie->def->type;
+
+	/* get hold of the raw key
+	 * - stick the length on the front and leave space on the back for the
+	 *   encoder
+	 */
+	buffer = kmalloc((2 + 512) + 3, GFP_KERNEL);
+	if (!buffer)
+		goto nomem_buffer;
+
+	keylen = cookie->def->get_key(cookie->netfs_data, buffer + 2, 512);
+	ASSERTCMP(keylen, <, 512);
+
+	*(uint16_t *)buffer = keylen;
+	((char *)buffer)[keylen + 2] = 0;
+	((char *)buffer)[keylen + 3] = 0;
+	((char *)buffer)[keylen + 4] = 0;
+
+	/* turn the raw key into something that can work with as a filename */
+	key = cachefiles_cook_key(buffer, keylen + 2, object->type);
+	if (!key)
+		goto nomem_key;
+
+	/* get hold of the auxiliary data and prepend the object type */
+	auxdata = buffer;
+	auxlen = 0;
+	if (cookie->def->get_aux) {
+		auxlen = cookie->def->get_aux(cookie->netfs_data,
+					      auxdata->data, 511);
+		ASSERTCMP(auxlen, <, 511);
+	}
+
+	auxdata->len = auxlen + 1;
+	auxdata->type = cookie->def->type;
+
+	/* look up the key, creating any missing bits */
+	ret = cachefiles_walk_to_object(parent, object, key, auxdata);
+	if (ret < 0)
+		goto lookup_failed;
+
+	kfree(buffer);
+	kfree(key);
+	_leave(" = %p", &object->fscache);
+	return &object->fscache;
+
+lookup_failed:
+	kmem_cache_free(cachefiles_object_jar, object);
+	kfree(buffer);
+	kfree(key);
+	_leave(" = %d", ret);
+	return ERR_PTR(ret);
+
+nomem_key:
+	kfree(buffer);
+nomem_buffer:
+	kmem_cache_free(cachefiles_object_jar, object);
+nomem_object:
+	_leave(" = -ENOMEM");
+	return ERR_PTR(-ENOMEM);
+
+} /* end cachefiles_lookup_object() */
+
+/*****************************************************************************/
+/*
+ * increment the usage count on an inode object (may fail if unmounting)
+ */
+static struct fscache_object *cachefiles_grab_object(struct fscache_object *_object)
+{
+	struct cachefiles_object *object;
+
+	_enter("%p", _object);
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+
+#ifdef CACHEFILES_DEBUG_SLAB
+	ASSERT((atomic_read(&object->fscache_usage) & 0xffff0000) != 0x6b6b0000);
+#endif
+
+	atomic_inc(&object->fscache_usage);
+	return &object->fscache;
+
+} /* end cachefiles_grab_object() */
+
+/*****************************************************************************/
+/*
+ * lock the semaphore on an object object
+ */
+static void cachefiles_lock_object(struct fscache_object *_object)
+{
+	struct cachefiles_object *object;
+
+	_enter("%p", _object);
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+
+#ifdef CACHEFILES_DEBUG_SLAB
+	ASSERT((atomic_read(&object->fscache_usage) & 0xffff0000) != 0x6b6b0000);
+#endif
+
+	down_write(&object->sem);
+
+} /* end cachefiles_lock_object() */
+
+/*****************************************************************************/
+/*
+ * unlock the semaphore on an object object
+ */
+static void cachefiles_unlock_object(struct fscache_object *_object)
+{
+	struct cachefiles_object *object;
+
+	_enter("%p", _object);
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+	up_write(&object->sem);
+
+} /* end cachefiles_unlock_object() */
+
+/*****************************************************************************/
+/*
+ * update the auxilliary data for an object object on disk
+ */
+static void cachefiles_update_object(struct fscache_object *_object)
+{
+	struct cachefiles_object *object;
+	struct cachefiles_cache *cache;
+
+	_enter("%p", _object);
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+	cache = container_of(object->fscache.cache, struct cachefiles_cache, cache);
+
+	//cachefiles_tree_update_object(super, object);
+
+} /* end cachefiles_update_object() */
+
+/*****************************************************************************/
+/*
+ * dispose of a reference to an object object
+ */
+static void cachefiles_put_object(struct fscache_object *_object)
+{
+	struct cachefiles_object *object;
+	struct cachefiles_cache *cache;
+	int ret;
+
+	ASSERT(_object);
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+	_enter("%p{%d}", object, atomic_read(&object->usage));
+
+	ASSERT(object);
+
+	cache = container_of(object->fscache.cache,
+			     struct cachefiles_cache, cache);
+
+#ifdef CACHEFILES_DEBUG_SLAB
+	ASSERT((atomic_read(&object->fscache_usage) & 0xffff0000) != 0x6b6b0000);
+#endif
+
+	if (!atomic_dec_and_test(&object->fscache_usage))
+		return;
+
+	_debug("- kill object %p", object);
+
+	/* delete retired objects */
+	if (test_bit(FSCACHE_OBJECT_RECYCLING, &object->fscache.flags) &&
+	    _object != cache->cache.fsdef
+	    ) {
+		_debug("- retire object %p", object);
+		cachefiles_delete_object(cache, object);
+	}
+
+	/* close the filesystem stuff attached to the object */
+	if (object->backer) {
+		if (object->backer->f_op &&
+		    object->backer->f_op->flush
+		    ) {
+			ret = object->backer->f_op->flush(object->backer,
+							  NULL);
+			if (ret < 0)
+				kerror("Backing file flush returned error %d",
+				       ret);
+		}
+		fput(object->backer);
+		object->backer = NULL;
+	}
+
+	/* note that an object is now inactive */
+	write_lock(&cache->active_lock);
+	rb_erase(&object->active_node, &cache->active_nodes);
+	write_unlock(&cache->active_lock);
+
+	dput(object->dentry);
+	object->dentry = NULL;
+
+	/* then dispose of the object */
+	kmem_cache_free(cachefiles_object_jar, object);
+
+	_leave("");
+
+} /* end cachefiles_put_object() */
+
+/*****************************************************************************/
+/*
+ * sync a cache
+ */
+static void cachefiles_sync_cache(struct fscache_cache *_cache)
+{
+	struct cachefiles_cache *cache;
+	int ret;
+
+	_enter("%p", _cache);
+
+	cache = container_of(_cache, struct cachefiles_cache, cache);
+
+	/* make sure all pages pinned by operations on behalf of the netfs are
+	 * written to disc */
+	ret = fsync_super(cache->mnt->mnt_sb);
+	if (ret == -EIO)
+		cachefiles_io_error(cache,
+				    "Attempt to sync backing fs superblock"
+				    " returned error %d",
+				    ret);
+
+} /* end cachefiles_sync_cache() */
+
+/*****************************************************************************/
+/*
+ * set the data size on an object
+ */
+static int cachefiles_set_i_size(struct fscache_object *_object, loff_t i_size)
+{
+	struct cachefiles_object *object;
+	struct iattr newattrs;
+	int ret;
+
+	_enter("%p,%llu", _object, i_size);
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+
+	if (i_size == object->i_size)
+		return 0;
+
+	if (!object->backer)
+		return -ENOBUFS;
+
+	ASSERT(S_ISREG(object->backer->f_dentry->d_inode->i_mode));
+
+	newattrs.ia_size = i_size;
+	newattrs.ia_file = object->backer;
+	newattrs.ia_valid = ATTR_SIZE | ATTR_FILE;
+
+	mutex_lock(&object->backer->f_dentry->d_inode->i_mutex);
+	ret = notify_change(object->backer->f_dentry, &newattrs);
+	mutex_unlock(&object->backer->f_dentry->d_inode->i_mutex);
+
+	if (ret == -EIO) {
+		cachefiles_io_error_obj(object, "Size set failed");
+		ret = -ENOBUFS;
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_set_i_size() */
+
+/*****************************************************************************/
+/*
+ * see if we have space for a number of pages in the cache
+ */
+int cachefiles_has_space(struct cachefiles_cache *cache, unsigned nr)
+{
+	struct kstatfs stats;
+	int ret;
+
+	_enter("{%llu,%llu,%llu},%d",
+	       cache->brun, cache->bcull, cache->bstop,  nr);
+
+	/* find out how many pages of blockdev are available */
+	memset(&stats, 0, sizeof(stats));
+
+	ret = cache->mnt->mnt_sb->s_op->statfs(cache->mnt->mnt_root, &stats);
+	if (ret < 0) {
+		if (ret == -EIO)
+			cachefiles_io_error(cache, "statfs failed");
+		return ret;
+	}
+
+	stats.f_bavail >>= cache->bshift;
+
+	_debug("avail %llu", stats.f_bavail);
+
+	/* see if there is sufficient space */
+	stats.f_bavail -= nr;
+
+	ret = -ENOBUFS;
+	if (stats.f_bavail < cache->bstop)
+		goto begin_cull;
+
+	ret = 0;
+	if (stats.f_bavail < cache->bcull)
+		goto begin_cull;
+
+	if (test_bit(CACHEFILES_CULLING, &cache->flags) &&
+	    stats.f_bavail >= cache->brun
+	    ) {
+		if (test_and_clear_bit(CACHEFILES_CULLING, &cache->flags)) {
+			_debug("cease culling");
+			send_sigurg(&cache->cachefilesd->f_owner);
+		}
+	}
+
+	_leave(" = 0");
+	return 0;
+
+begin_cull:
+	if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) {
+		_debug("### CULL CACHE ###");
+		send_sigurg(&cache->cachefilesd->f_owner);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_has_space() */
+
+/*****************************************************************************/
+/*
+ * waiting reading backing files
+ */
+static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
+				  int sync, void *_key)
+{
+	struct cachefiles_one_read *monitor =
+		container_of(wait, struct cachefiles_one_read, monitor);
+	struct wait_bit_key *key = _key;
+	struct page *page = wait->private;
+
+	ASSERT(key);
+
+	_enter("{%lu},%u,%d,{%p,%u}",
+	       monitor->netfs_page->index, mode, sync,
+	       key->flags, key->bit_nr);
+
+	if (key->flags != &page->flags ||
+	    key->bit_nr != PG_locked)
+		return 0;
+
+	_debug("--- monitor %p %lx ---", page, page->flags);
+
+	if (!PageUptodate(page) && !PageError(page))
+		dump_stack();
+
+	/* remove from the waitqueue */
+	list_del(&wait->task_list);
+
+	/* move onto the action list and queue for keventd */
+	ASSERT(monitor->object);
+
+	spin_lock(&monitor->object->work_lock);
+	list_move(&monitor->obj_link, &monitor->object->read_list);
+	spin_unlock(&monitor->object->work_lock);
+
+	schedule_work(&monitor->object->read_work);
+
+	return 0;
+
+} /* end cachefiles_read_waiter() */
+
+/*****************************************************************************/
+/*
+ * let keventd drive the copying of pages
+ */
+void cachefiles_read_copier_work(void *_object)
+{
+	struct cachefiles_one_read *monitor;
+	struct cachefiles_object *object = _object;
+	struct fscache_cookie *cookie = object->fscache.cookie;
+	struct pagevec pagevec;
+	int error, max;
+
+	_enter("{ino=%lu}", object->backer->f_dentry->d_inode->i_ino);
+
+	pagevec_init(&pagevec, 0);
+
+	max = 8;
+	spin_lock_irq(&object->work_lock);
+
+	while (!list_empty(&object->read_list)) {
+		monitor = list_entry(object->read_list.next,
+				     struct cachefiles_one_read, obj_link);
+		list_del(&monitor->obj_link);
+
+		spin_unlock_irq(&object->work_lock);
+
+		_debug("- copy {%lu}", monitor->back_page->index);
+
+		error = -EIO;
+		if (PageUptodate(monitor->back_page)) {
+			copy_highpage(monitor->netfs_page, monitor->back_page);
+
+			pagevec_add(&pagevec, monitor->netfs_page);
+			cookie->def->mark_pages_cached(
+				cookie->netfs_data,
+				monitor->netfs_page->mapping,
+				&pagevec);
+			pagevec_reinit(&pagevec);
+
+			error = 0;
+		}
+
+		if (error)
+			cachefiles_io_error_obj(
+				object,
+				"readpage failed on backing file %lx",
+				(unsigned long) monitor->back_page->flags);
+
+		page_cache_release(monitor->back_page);
+
+		monitor->end_io_func(monitor->netfs_page,
+				     monitor->context,
+				     error);
+
+		page_cache_release(monitor->netfs_page);
+		fscache_put_context(cookie, monitor->context);
+		kfree(monitor);
+
+		/* let keventd have some air occasionally */
+		max--;
+		if (max < 0 || need_resched()) {
+			if (!list_empty(&object->read_list))
+				schedule_work(&object->read_work);
+			_leave(" [maxed out]");
+			return;
+		}
+
+		spin_lock_irq(&object->work_lock);
+	}
+
+	spin_unlock_irq(&object->work_lock);
+
+	_leave("");
+
+} /* end cachefiles_read_copier_work() */
+
+/*****************************************************************************/
+/*
+ * read the corresponding page to the given set from the backing file
+ * - an uncertain page is simply discarded, to be tried again another time
+ */
+static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
+					    fscache_rw_complete_t end_io_func,
+					    void *context,
+					    struct page *netpage,
+					    struct pagevec *lru_pvec)
+{
+	struct cachefiles_one_read *monitor;
+	struct address_space *bmapping;
+	struct page *newpage, *backpage;
+	int ret;
+
+	_enter("");
+
+	ASSERTCMP(pagevec_count(lru_pvec), ==, 0);
+	pagevec_reinit(lru_pvec);
+
+	_debug("read back %p{%lu,%d}",
+	       netpage, netpage->index, page_count(netpage));
+
+	monitor = kzalloc(sizeof(*monitor), GFP_KERNEL);
+	if (!monitor)
+		goto nomem;
+
+	monitor->netfs_page = netpage;
+	monitor->object = object;
+	monitor->end_io_func = end_io_func;
+	monitor->context = fscache_get_context(object->fscache.cookie,
+					       context);
+
+	init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter);
+
+	/* attempt to get hold of the backing page */
+	bmapping = object->backer->f_mapping;
+	newpage = NULL;
+
+	for (;;) {
+		backpage = find_get_page(bmapping, netpage->index);
+		if (backpage)
+			goto backing_page_already_present;
+
+		if (!newpage) {
+			newpage = page_cache_alloc_cold(bmapping);
+			if (!newpage)
+				goto nomem_monitor;
+		}
+
+		ret = add_to_page_cache(newpage, bmapping,
+					netpage->index, GFP_KERNEL);
+		if (ret == 0)
+			goto installed_new_backing_page;
+		if (ret != -EEXIST)
+			goto nomem_page;
+	}
+
+	/* we've installed a new backing page, so now we need to add it
+	 * to the LRU list and start it reading */
+installed_new_backing_page:
+	_debug("- new %p", newpage);
+
+	backpage = newpage;
+	newpage = NULL;
+
+	page_cache_get(backpage);
+	pagevec_add(lru_pvec, backpage);
+	__pagevec_lru_add(lru_pvec);
+
+	ret = bmapping->a_ops->readpage(object->backer, backpage);
+	if (ret < 0)
+		goto read_error;
+
+	/* set the monitor to transfer the data across */
+monitor_backing_page:
+	_debug("- monitor add");
+
+	/* install the monitor */
+	page_cache_get(monitor->netfs_page);
+	page_cache_get(backpage);
+	monitor->back_page = backpage;
+
+	spin_lock_irq(&object->work_lock);
+	list_add_tail(&monitor->obj_link, &object->read_pend_list);
+	spin_unlock_irq(&object->work_lock);
+
+	monitor->monitor.private = backpage;
+	install_page_waitqueue_monitor(backpage, &monitor->monitor);
+	monitor = NULL;
+
+	/* but the page may have been read before the monitor was
+	 * installed, so the monitor may miss the event - so we have to
+	 * ensure that we do get one in such a case */
+	if (!TestSetPageLocked(backpage))
+		unlock_page(backpage);
+	goto success;
+
+	/* if the backing page is already present, it can be in one of
+	 * three states: read in progress, read failed or read okay */
+backing_page_already_present:
+	_debug("- present");
+
+	if (newpage) {
+		page_cache_release(newpage);
+		newpage = NULL;
+	}
+
+	if (PageError(backpage))
+		goto io_error;
+
+	if (PageUptodate(backpage))
+		goto backing_page_already_uptodate;
+
+	goto monitor_backing_page;
+
+	/* the backing page is already up to date, attach the netfs
+	 * page to the pagecache and LRU and copy the data across */
+backing_page_already_uptodate:
+	_debug("- uptodate");
+
+	copy_highpage(netpage, backpage);
+	end_io_func(netpage, context, 0);
+
+success:
+	_debug("success");
+	ret = 0;
+
+out:
+	if (backpage)
+		page_cache_release(backpage);
+	if (monitor) {
+		fscache_put_context(object->fscache.cookie, monitor->context);
+		kfree(monitor);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+
+read_error:
+	_debug("read error %d", ret);
+	if (ret == -ENOMEM)
+		goto out;
+io_error:
+	cachefiles_io_error_obj(object, "page read error on backing file");
+	ret = -EIO;
+	goto out;
+
+nomem_page:
+	page_cache_release(newpage);
+nomem_monitor:
+	fscache_put_context(object->fscache.cookie, monitor->context);
+	kfree(monitor);
+nomem:
+	_leave(" = -ENOMEM");
+	return -ENOMEM;
+
+} /* end cachefiles_read_backing_file_one() */
+
+/*****************************************************************************/
+/*
+ * read a page from the cache or allocate a block in which to store it
+ * - cache withdrawal is prevented by the caller
+ * - returns -EINTR if interrupted
+ * - returns -ENOMEM if ran out of memory
+ * - returns -ENOBUFS if no buffers can be made available
+ * - returns -ENOBUFS if page is beyond EOF
+ * - if the page is backed by a block in the cache:
+ *   - a read will be started which will call the callback on completion
+ *   - 0 will be returned
+ * - else if the page is unbacked:
+ *   - the metadata will be retained
+ *   - -ENODATA will be returned
+ */
+static int cachefiles_read_or_alloc_page(struct fscache_object *_object,
+					 struct page *page,
+					 fscache_rw_complete_t end_io_func,
+					 void *context,
+					 unsigned long gfp)
+{
+	struct cachefiles_object *object;
+	struct cachefiles_cache *cache;
+	struct fscache_cookie *cookie;
+	struct pagevec pagevec;
+	struct inode *inode;
+	sector_t block0, block;
+	unsigned shift;
+	int ret;
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+	cache = container_of(object->fscache.cache, struct cachefiles_cache, cache);
+
+	_enter("{%p},{%lx},,,", object, page->index);
+
+	if (!object->backer)
+		return -ENOBUFS;
+
+	inode = object->backer->f_dentry->d_inode;
+	ASSERT(S_ISREG(inode->i_mode));
+	ASSERT(inode->i_mapping->a_ops->bmap);
+	ASSERT(inode->i_mapping->a_ops->readpages);
+
+	/* calculate the shift required to use bmap */
+	if (inode->i_sb->s_blocksize > PAGE_SIZE)
+		return -ENOBUFS;
+
+	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
+
+	cookie = object->fscache.cookie;
+
+	pagevec_init(&pagevec, 0);
+
+	/* we assume the absence or presence of the first block is a good
+	 * enough indication for the page as a whole
+	 * - TODO: don't use bmap() for this as it is _not_ actually good
+	 *   enough for this as it doesn't indicate errors, but it's all we've
+	 *   got for the moment
+	 */
+	block0 = page->index;
+	block0 <<= shift;
+
+	block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
+	_debug("%llx -> %llx", block0, block);
+
+	if (block) {
+		/* submit the apparently valid page to the backing fs to be
+		 * read from disk */
+		ret = cachefiles_read_backing_file_one(object,
+						       end_io_func,
+						       context,
+						       page,
+						       &pagevec);
+		ret = 0;
+	}
+	else if (cachefiles_has_space(cache, 1) == 0) {
+		/* there's space in the cache we can use */
+		pagevec_add(&pagevec, page);
+		cookie->def->mark_pages_cached(cookie->netfs_data,
+					       page->mapping, &pagevec);
+		ret = -ENODATA;
+	}
+	else {
+		ret = -ENOBUFS;
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_read_or_alloc_page() */
+
+/*****************************************************************************/
+/*
+ * read the corresponding pages to the given set from the backing file
+ * - any uncertain pages are simply discarded, to be tried again another time
+ */
+static int cachefiles_read_backing_file(struct cachefiles_object *object,
+					fscache_rw_complete_t end_io_func,
+					void *context,
+					struct address_space *mapping,
+					struct list_head *list,
+					struct pagevec *lru_pvec)
+{
+	struct cachefiles_one_read *monitor = NULL;
+	struct address_space *bmapping = object->backer->f_mapping;
+	struct page *newpage = NULL, *netpage, *_n, *backpage = NULL;
+	int ret = 0;
+
+	_enter("");
+
+	ASSERTCMP(pagevec_count(lru_pvec), ==, 0);
+	pagevec_reinit(lru_pvec);
+
+	list_for_each_entry_safe(netpage, _n, list, lru) {
+		list_del(&netpage->lru);
+
+		_debug("read back %p{%lu,%d}",
+		       netpage, netpage->index, page_count(netpage));
+
+		if (!monitor) {
+			monitor = kzalloc(sizeof(*monitor), GFP_KERNEL);
+			if (!monitor)
+				goto nomem;
+
+			monitor->object = object;
+			monitor->end_io_func = end_io_func;
+			monitor->context = fscache_get_context(
+				object->fscache.cookie, context);
+
+			init_waitqueue_func_entry(&monitor->monitor,
+						  cachefiles_read_waiter);
+		}
+
+		for (;;) {
+			backpage = find_get_page(bmapping, netpage->index);
+			if (backpage)
+				goto backing_page_already_present;
+
+			if (!newpage) {
+				newpage = page_cache_alloc_cold(bmapping);
+				if (!newpage)
+					goto nomem;
+			}
+
+			ret = add_to_page_cache(newpage, bmapping,
+						netpage->index, GFP_KERNEL);
+			if (ret == 0)
+				goto installed_new_backing_page;
+			if (ret != -EEXIST)
+				goto nomem;
+		}
+
+		/* we've installed a new backing page, so now we need to add it
+		 * to the LRU list and start it reading */
+	installed_new_backing_page:
+		_debug("- new %p", newpage);
+
+		backpage = newpage;
+		newpage = NULL;
+
+		page_cache_get(backpage);
+		if (!pagevec_add(lru_pvec, backpage))
+			__pagevec_lru_add(lru_pvec);
+
+	reread_backing_page:
+		ret = bmapping->a_ops->readpage(object->backer, backpage);
+		if (ret < 0)
+			goto read_error;
+
+		/* add the netfs page to the pagecache and LRU, and set the
+		 * monitor to transfer the data across */
+	monitor_backing_page:
+		_debug("- monitor add");
+
+		ret = add_to_page_cache(netpage, mapping, netpage->index,
+					GFP_KERNEL);
+		if (ret < 0) {
+			if (ret == -EEXIST) {
+				page_cache_release(netpage);
+				continue;
+			}
+			goto nomem;
+		}
+
+		page_cache_get(netpage);
+		if (!pagevec_add(lru_pvec, netpage))
+			__pagevec_lru_add(lru_pvec);
+
+		/* install a monitor */
+		page_cache_get(netpage);
+		monitor->netfs_page = netpage;
+
+		page_cache_get(backpage);
+		monitor->back_page = backpage;
+
+		spin_lock_irq(&object->work_lock);
+		list_add_tail(&monitor->obj_link, &object->read_pend_list);
+		spin_unlock_irq(&object->work_lock);
+
+		monitor->monitor.private = backpage;
+		install_page_waitqueue_monitor(backpage, &monitor->monitor);
+		monitor = NULL;
+
+		/* but the page may have been read before the monitor was
+		 * installed, so the monitor may miss the event - so we have to
+		 * ensure that we do get one in such a case */
+		if (!TestSetPageLocked(backpage)) {
+			_debug("2unlock %p", backpage);
+			unlock_page(backpage);
+		}
+
+		page_cache_release(backpage);
+		backpage = NULL;
+
+		page_cache_release(netpage);
+		netpage = NULL;
+		continue;
+
+		/* if the backing page is already present, it can be in one of
+		 * three states: read in progress, read failed or read okay */
+	backing_page_already_present:
+		_debug("- present %p", backpage);
+
+		if (PageError(backpage))
+			goto io_error;
+
+		if (PageUptodate(backpage))
+			goto backing_page_already_uptodate;
+
+		_debug("- not ready %p{%lx}", backpage, backpage->flags);
+
+		if (TestSetPageLocked(backpage))
+			goto monitor_backing_page;
+
+		if (PageError(backpage)) {
+			unlock_page(backpage);
+			goto io_error;
+		}
+
+		if (PageUptodate(backpage))
+			goto backing_page_already_uptodate_unlock;
+
+		/* we've locked a page that's neither up to date nor erroneous,
+		 * so we need to attempt to read it again */
+		goto reread_backing_page;
+
+		/* the backing page is already up to date, attach the netfs
+		 * page to the pagecache and LRU and copy the data across */
+	backing_page_already_uptodate_unlock:
+		unlock_page(backpage);
+	backing_page_already_uptodate:
+		_debug("- uptodate");
+
+		ret = add_to_page_cache(netpage, mapping, netpage->index,
+					GFP_KERNEL);
+		if (ret < 0) {
+			if (ret == -EEXIST) {
+				page_cache_release(netpage);
+				continue;
+			}
+			goto nomem;
+		}
+
+		copy_highpage(netpage, backpage);
+
+		page_cache_release(backpage);
+		backpage = NULL;
+
+		page_cache_get(netpage);
+		if (!pagevec_add(lru_pvec, netpage))
+			__pagevec_lru_add(lru_pvec);
+
+		end_io_func(netpage, context, 0);
+
+		page_cache_release(netpage);
+		netpage = NULL;
+		continue;
+	}
+
+	netpage = NULL;
+
+	_debug("out");
+
+out:
+	/* tidy up */
+	pagevec_lru_add(lru_pvec);
+
+	if (newpage)
+		page_cache_release(newpage);
+	if (netpage)
+		page_cache_release(netpage);
+	if (backpage)
+		page_cache_release(backpage);
+	if (monitor) {
+		fscache_put_context(object->fscache.cookie, monitor->context);
+		kfree(monitor);
+	}
+
+	list_for_each_entry_safe(netpage, _n, list, lru) {
+		list_del(&netpage->lru);
+		page_cache_release(netpage);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+
+nomem:
+	_debug("nomem");
+	ret = -ENOMEM;
+	goto out;
+
+read_error:
+	_debug("read error %d", ret);
+	if (ret == -ENOMEM)
+		goto out;
+io_error:
+	cachefiles_io_error_obj(object, "page read error on backing file");
+	ret = -EIO;
+	goto out;
+
+} /* end cachefiles_read_backing_file() */
+
+/*****************************************************************************/
+/*
+ * read a list of pages from the cache or allocate blocks in which to store
+ * them
+ */
+static int cachefiles_read_or_alloc_pages(struct fscache_object *_object,
+					  struct address_space *mapping,
+					  struct list_head *pages,
+					  unsigned *nr_pages,
+					  fscache_rw_complete_t end_io_func,
+					  void *context,
+					  unsigned long gfp)
+{
+	struct cachefiles_object *object;
+	struct cachefiles_cache *cache;
+	struct fscache_cookie *cookie;
+	struct list_head backpages;
+	struct pagevec pagevec;
+	struct inode *inode;
+	struct page *page, *_n;
+	unsigned shift, nrbackpages;
+	int ret, ret2, space;
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+	cache = container_of(object->fscache.cache, struct cachefiles_cache, cache);
+
+	_enter("{%p},,%d,,", object, *nr_pages);
+
+	if (!object->backer)
+		return -ENOBUFS;
+
+	space = 1;
+	if (cachefiles_has_space(cache, *nr_pages) < 0)
+		space = 0;
+
+	inode = object->backer->f_dentry->d_inode;
+	ASSERT(S_ISREG(inode->i_mode));
+	ASSERT(inode->i_mapping->a_ops->bmap);
+	ASSERT(inode->i_mapping->a_ops->readpages);
+
+	/* calculate the shift required to use bmap */
+	if (inode->i_sb->s_blocksize > PAGE_SIZE)
+		return -ENOBUFS;
+
+	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
+
+	pagevec_init(&pagevec, 0);
+
+	cookie = object->fscache.cookie;
+
+	INIT_LIST_HEAD(&backpages);
+	nrbackpages = 0;
+
+	ret = space ? -ENODATA : -ENOBUFS;
+	list_for_each_entry_safe(page, _n, pages, lru) {
+		sector_t block0, block;
+
+		/* we assume the absence or presence of the first block is a
+		 * good enough indication for the page as a whole
+		 * - TODO: don't use bmap() for this as it is _not_ actually
+		 *   good enough for this as it doesn't indicate errors, but
+		 *   it's all we've got for the moment
+		 */
+		block0 = page->index;
+		block0 <<= shift;
+
+		block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
+						      block0);
+		_debug("%llx -> %llx", block0, block);
+
+		if (block) {
+			/* we have data - add it to the list to give to the
+			 * backing fs */
+			list_move(&page->lru, &backpages);
+			(*nr_pages)--;
+			nrbackpages++;
+		}
+		else if (space && pagevec_add(&pagevec, page) == 0) {
+			cookie->def->mark_pages_cached(cookie->netfs_data,
+						       mapping, &pagevec);
+			pagevec_reinit(&pagevec);
+			ret = -ENODATA;
+		}
+	}
+
+	if (pagevec_count(&pagevec) > 0) {
+		cookie->def->mark_pages_cached(cookie->netfs_data,
+					       mapping, &pagevec);
+		pagevec_reinit(&pagevec);
+	}
+
+	if (list_empty(pages))
+		ret = 0;
+
+	/* submit the apparently valid pages to the backing fs to be read from disk */
+	if (nrbackpages > 0) {
+		ret2 = cachefiles_read_backing_file(object,
+						    end_io_func,
+						    context,
+						    mapping,
+						    &backpages,
+						    &pagevec);
+
+		ASSERTCMP(pagevec_count(&pagevec), ==, 0);
+
+		if (ret2 == -ENOMEM || ret2 == -EINTR)
+			ret = ret2;
+	}
+
+	_leave(" = %d [nr=%u%s]",
+	       ret, *nr_pages, list_empty(pages) ? " empty" : "");
+	return ret;
+
+} /* end cachefiles_read_or_alloc_pages() */
+
+/*****************************************************************************/
+/*
+ * read a page from the cache or allocate a block in which to store it
+ * - cache withdrawal is prevented by the caller
+ * - returns -EINTR if interrupted
+ * - returns -ENOMEM if ran out of memory
+ * - returns -ENOBUFS if no buffers can be made available
+ * - returns -ENOBUFS if page is beyond EOF
+ * - otherwise:
+ *   - the metadata will be retained
+ *   - 0 will be returned
+ */
+static int cachefiles_allocate_page(struct fscache_object *_object,
+				    struct page *page,
+				    unsigned long gfp)
+{
+	struct cachefiles_object *object;
+	struct cachefiles_cache *cache;
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+	cache = container_of(object->fscache.cache,
+			     struct cachefiles_cache, cache);
+
+	_enter("%p,{%lx},,,", object, page->index);
+
+	return cachefiles_has_space(cache, 1);
+
+} /* end cachefiles_allocate_page() */
+
+/*****************************************************************************/
+/*
+ * page storer
+ */
+void cachefiles_write_work(void *_object)
+{
+	struct cachefiles_one_write *writer;
+	struct cachefiles_object *object = _object;
+	int ret, max;
+
+	_enter("%p", object);
+
+	ASSERT(!irqs_disabled());
+
+	spin_lock_irq(&object->work_lock);
+	max = 8;
+
+	while (!list_empty(&object->write_list)) {
+		writer = list_entry(object->write_list.next,
+				    struct cachefiles_one_write, obj_link);
+		list_del(&writer->obj_link);
+
+		spin_unlock_irq(&object->work_lock);
+
+		_debug("- store {%lu}", writer->netfs_page->index);
+
+		ret = generic_file_buffered_write_one_kernel_page(
+			object->backer,
+			writer->netfs_page->index,
+			writer->netfs_page);
+
+		if (ret == -ENOSPC) {
+			ret = -ENOBUFS;
+		}
+		else if (ret == -EIO) {
+			cachefiles_io_error_obj(object,
+						"write page to backing file"
+						" failed");
+			ret = -ENOBUFS;
+		}
+
+		_debug("- callback");
+		writer->end_io_func(writer->netfs_page,
+				    writer->context,
+				    ret);
+
+		_debug("- put net");
+		page_cache_release(writer->netfs_page);
+		fscache_put_context(object->fscache.cookie, writer->context);
+		kfree(writer);
+
+		/* let keventd have some air occasionally */
+		max--;
+		if (max < 0 || need_resched()) {
+			if (!list_empty(&object->write_list))
+				schedule_work(&object->write_work);
+			_leave(" [maxed out]");
+			return;
+		}
+
+		_debug("- next");
+		spin_lock_irq(&object->work_lock);
+	}
+
+	spin_unlock_irq(&object->work_lock);
+	_leave("");
+
+} /* end cachefiles_write_work() */
+
+/*****************************************************************************/
+/*
+ * request a page be stored in the cache
+ * - cache withdrawal is prevented by the caller
+ * - this request may be ignored if there's no cache block available, in which
+ *   case -ENOBUFS will be returned
+ * - if the op is in progress, 0 will be returned
+ */
+static int cachefiles_write_page(struct fscache_object *_object,
+				 struct page *page,
+				 fscache_rw_complete_t end_io_func,
+				 void *context,
+				 unsigned long gfp)
+{
+//	struct cachefiles_one_write *writer;
+	struct cachefiles_object *object;
+	int ret;
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+
+	_enter("%p,%p{%lx},,,", object, page, page->index);
+
+	if (!object->backer)
+		return -ENOBUFS;
+
+	ASSERT(S_ISREG(object->backer->f_dentry->d_inode->i_mode));
+
+#if 0 // set to 1 for deferred writing
+	/* queue the operation for deferred processing by keventd */
+	writer = kzalloc(sizeof(*writer), GFP_KERNEL);
+	if (!writer)
+		return -ENOMEM;
+
+	page_cache_get(page);
+	writer->netfs_page = page;
+	writer->object = object;
+	writer->end_io_func = end_io_func;
+	writer->context = facache_get_context(object->fscache.cookie, context);
+
+	spin_lock_irq(&object->work_lock);
+	list_add_tail(&writer->obj_link, &object->write_list);
+	spin_unlock_irq(&object->work_lock);
+
+	schedule_work(&object->write_work);
+	ret = 0;
+
+#else
+	/* copy the page to ext3 and let it store it in its own time */
+	ret = generic_file_buffered_write_one_kernel_page(object->backer,
+							  page->index,
+							  page);
+
+	if (ret != 0) {
+		if (ret == -EIO)
+			cachefiles_io_error_obj(object,
+						"write page to backing file"
+						" failed");
+		ret = -ENOBUFS;
+	}
+
+	end_io_func(page, context, ret);
+#endif
+
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_write_page() */
+
+/*****************************************************************************/
+/*
+ * detach a backing block from a page
+ * - cache withdrawal is prevented by the caller
+ */
+static void cachefiles_uncache_pages(struct fscache_object *_object,
+				     struct pagevec *pagevec)
+{
+	struct cachefiles_object *object;
+	struct cachefiles_cache *cache;
+
+	object = container_of(_object, struct cachefiles_object, fscache);
+	cache = container_of(object->fscache.cache,
+			     struct cachefiles_cache, cache);
+
+	_enter("%p,{%lu,%lx},,,",
+	       object, pagevec->nr, pagevec->pages[0]->index);
+
+} /* end cachefiles_uncache_pages() */
+
+/*****************************************************************************/
+/*
+ * dissociate a cache from all the pages it was backing
+ */
+static void cachefiles_dissociate_pages(struct fscache_cache *cache)
+{
+	_enter("");
+
+} /* end cachefiles_dissociate_pages() */
+
+struct fscache_cache_ops cachefiles_cache_ops = {
+	.name			= "cachefiles",
+	.lookup_object		= cachefiles_lookup_object,
+	.grab_object		= cachefiles_grab_object,
+	.lock_object		= cachefiles_lock_object,
+	.unlock_object		= cachefiles_unlock_object,
+	.update_object		= cachefiles_update_object,
+	.put_object		= cachefiles_put_object,
+	.sync_cache		= cachefiles_sync_cache,
+	.set_i_size		= cachefiles_set_i_size,
+	.read_or_alloc_page	= cachefiles_read_or_alloc_page,
+	.read_or_alloc_pages	= cachefiles_read_or_alloc_pages,
+	.allocate_page		= cachefiles_allocate_page,
+	.write_page		= cachefiles_write_page,
+	.uncache_pages		= cachefiles_uncache_pages,
+	.dissociate_pages	= cachefiles_dissociate_pages,
+};
diff --git a/fs/cachefiles/cf-key.c b/fs/cachefiles/cf-key.c
new file mode 100644
index 0000000..86d7fe7
--- /dev/null
+++ b/fs/cachefiles/cf-key.c
@@ -0,0 +1,160 @@
+/* cf-key.c: Key to pathname encoder
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include "internal.h"
+
+static const char cachefiles_charmap[64] =
+	"0123456789"			/* 0 - 9 */
+	"abcdefghijklmnopqrstuvwxyz"	/* 10 - 35 */
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"	/* 36 - 61 */
+	"_-"				/* 62 - 63 */
+	;
+
+static const char cachefiles_filecharmap[256] = {
+	/* we skip space and tab and control chars */
+	[ 33 ... 46 ] = 1,		/* '!' -> '.' */
+	/* we skip '/' as it's significant to pathwalk */
+	[ 48 ... 127 ] = 1,		/* '0' -> '~' */
+};
+
+/*****************************************************************************/
+/*
+ * turn the raw key into something cooked
+ * - the raw key should include the length in the two bytes at the front
+ * - the key may be up to 514 bytes in length (including the length word)
+ *   - "base64" encode the strange keys, mapping 3 bytes of raw to four of
+ *     cooked
+ *   - need to cut the cooked key into 252 char lengths (189 raw bytes)
+ */
+char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type)
+{
+	unsigned char csum, ch;
+	unsigned int acc;
+	char *key;
+	int loop, len, max, seg, mark, print;
+
+	_enter(",%d", keylen);
+
+	BUG_ON(keylen < 2 || keylen > 514);
+
+	csum = raw[0] + raw[1];
+	print = 1;
+	for (loop = 2; loop < keylen; loop++) {
+		ch = raw[loop];
+		csum += ch;
+		print &= cachefiles_filecharmap[ch];
+	}
+
+	if (print) {
+		/* if the path is usable ASCII, then we render it directly */
+		max = keylen - 2;
+		max += 2;	/* two base64'd length chars on the front */
+		max += 5;	/* @checksum/M */
+		max += 3 * 2;	/* maximum number of segment dividers (".../M")
+				 * is ((514 + 251) / 252) = 3
+				 */
+		max += 1;	/* NUL on end */
+	}
+	else {
+		/* calculate the maximum length of the cooked key */
+		keylen = (keylen + 2) / 3;
+
+		max = keylen * 4;
+		max += 5;	/* @checksum/M */
+		max += 3 * 2;	/* maximum number of segment dividers (".../M")
+				 * is ((514 + 188) / 189) = 3
+				 */
+		max += 1;	/* NUL on end */
+	}
+
+	_debug("max: %d", max);
+
+	key = kmalloc(max, GFP_KERNEL);
+	if (!key)
+		return NULL;
+
+	len = 0;
+
+	/* build the cooked key */
+	sprintf(key, "@%02x/+", (unsigned) csum);
+	len = 5;
+	mark = len - 1;
+
+	if (print) {
+		acc = *(uint16_t *) raw;
+		raw += 2;
+
+		key[len + 1] = cachefiles_charmap[acc & 63];
+		acc >>= 6;
+		key[len] = cachefiles_charmap[acc & 63];
+		len += 2;
+
+		seg = 250;
+		for (loop = keylen; loop > 0; loop--) {
+			if (seg <= 0) {
+				key[len++] = '/';
+				mark = len;
+				key[len++] = '+';
+				seg = 252;
+			}
+
+			key[len++] = *raw++;
+			ASSERT(len < max);
+		}
+
+		switch (type) {
+		case FSCACHE_COOKIE_TYPE_INDEX:		type = 'I';	break;
+		case FSCACHE_COOKIE_TYPE_DATAFILE:	type = 'D';	break;
+		default:				type = 'S';	break;
+		}
+	}
+	else {
+		seg = 252;
+		for (loop = keylen; loop > 0; loop--) {
+			if (seg <= 0) {
+				key[len++] = '/';
+				mark = len;
+				key[len++] = '+';
+				seg = 252;
+			}
+
+			acc = *raw++;
+			acc |= *raw++ << 8;
+			acc |= *raw++ << 16;
+
+			_debug("acc: %06x", acc);
+
+			key[len++] = cachefiles_charmap[acc & 63];
+			acc >>= 6;
+			key[len++] = cachefiles_charmap[acc & 63];
+			acc >>= 6;
+			key[len++] = cachefiles_charmap[acc & 63];
+			acc >>= 6;
+			key[len++] = cachefiles_charmap[acc & 63];
+
+			ASSERT(len < max);
+		}
+
+		switch (type) {
+		case FSCACHE_COOKIE_TYPE_INDEX:		type = 'J';	break;
+		case FSCACHE_COOKIE_TYPE_DATAFILE:	type = 'E';	break;
+		default:				type = 'T';	break;
+		}
+	}
+
+	key[mark] = type;
+	key[len] = 0;
+
+	_leave(" = %p %d:[%s]", key, len, key);
+	return key;
+
+} /* end cachefiles_cook_key() */
diff --git a/fs/cachefiles/cf-main.c b/fs/cachefiles/cf-main.c
new file mode 100644
index 0000000..660b544
--- /dev/null
+++ b/fs/cachefiles/cf-main.c
@@ -0,0 +1,131 @@
+/* cf-main.c: network filesystem caching backend to use cache files on a
+ *            premounted filesystem
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/statfs.h>
+#include <linux/proc_fs.h>
+#include <linux/sysctl.h>
+#include "internal.h"
+
+unsigned long cachefiles_debug;
+
+static int cachefiles_init(void);
+static void cachefiles_exit(void);
+
+fs_initcall(cachefiles_init);
+module_exit(cachefiles_exit);
+
+MODULE_DESCRIPTION("Mounted-filesystem based cache");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+kmem_cache_t *cachefiles_object_jar;
+
+static void cachefiles_object_init_once(void *_object, kmem_cache_t *cachep,
+					unsigned long flags)
+{
+	struct cachefiles_object *object = _object;
+
+	switch (flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) {
+	case SLAB_CTOR_CONSTRUCTOR:
+		memset(object, 0, sizeof(*object));
+		fscache_object_init(&object->fscache);
+		init_rwsem(&object->sem);
+		spin_lock_init(&object->work_lock);
+		INIT_LIST_HEAD(&object->read_list);
+		INIT_LIST_HEAD(&object->read_pend_list);
+		INIT_WORK(&object->read_work, &cachefiles_read_copier_work,
+			  object);
+		INIT_LIST_HEAD(&object->write_list);
+		INIT_WORK(&object->write_work, &cachefiles_write_work, object);
+		break;
+
+	default:
+		break;
+	}
+}
+
+/*****************************************************************************/
+/*
+ * initialise the fs caching module
+ */
+static int __init cachefiles_init(void)
+{
+	struct proc_dir_entry *pde;
+	int ret;
+
+	/* create a proc entry to use as a handle for the userspace daemon */
+	ret = -ENOMEM;
+
+	pde = create_proc_entry("cachefiles", 0600, proc_root_fs);
+	if (!pde) {
+		kerror("Unable to create /proc/fs/cachefiles");
+		goto error_proc;
+	}
+
+	if (cachefiles_sysctl_init() < 0) {
+		kerror("Unable to create sysctl parameters");
+		goto error_object_jar;
+	}
+
+	pde->owner = THIS_MODULE;
+	pde->proc_fops = &cachefiles_proc_fops;
+	cachefiles_proc = pde;
+
+	/* create an object jar */
+	cachefiles_object_jar =
+		kmem_cache_create("cachefiles_object_jar",
+				  sizeof(struct cachefiles_object),
+				  0,
+				  SLAB_HWCACHE_ALIGN,
+				  cachefiles_object_init_once,
+				  NULL);
+	if (!cachefiles_object_jar) {
+		printk(KERN_NOTICE
+		       "CacheFiles: Failed to allocate an object jar\n");
+		goto error_sysctl;
+	}
+
+	printk(KERN_INFO "CacheFiles: Loaded\n");
+	return 0;
+
+error_sysctl:
+	cachefiles_sysctl_cleanup();
+error_object_jar:
+	remove_proc_entry("cachefiles", proc_root_fs);
+error_proc:
+	kerror("failed to register: %d", ret);
+	return ret;
+
+} /* end cachefiles_init() */
+
+/*****************************************************************************/
+/*
+ * clean up on module removal
+ */
+static void __exit cachefiles_exit(void)
+{
+	printk(KERN_INFO "CacheFiles: Unloading\n");
+
+	kmem_cache_destroy(cachefiles_object_jar);
+	remove_proc_entry("cachefiles", proc_root_fs);
+	cachefiles_sysctl_cleanup();
+
+} /* end cachefiles_exit() */
diff --git a/fs/cachefiles/cf-namei.c b/fs/cachefiles/cf-namei.c
new file mode 100644
index 0000000..45e2359
--- /dev/null
+++ b/fs/cachefiles/cf-namei.c
@@ -0,0 +1,837 @@
+/* cf-namei.c: CacheFiles path walking and related routines
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/quotaops.h>
+#include <linux/xattr.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include "internal.h"
+
+/*****************************************************************************/
+/*
+ * record the fact that an object is now active
+ */
+static void cachefiles_mark_object_active(struct cachefiles_cache *cache,
+					  struct cachefiles_object *object)
+{
+	struct cachefiles_object *xobject;
+	struct rb_node **_p, *_parent = NULL;
+	struct dentry *dentry;
+
+	write_lock(&cache->active_lock);
+
+	dentry = object->dentry;
+	_p = &cache->active_nodes.rb_node;
+	while (*_p) {
+		_parent = *_p;
+		xobject = rb_entry(_parent,
+				   struct cachefiles_object, active_node);
+
+		if (xobject->dentry > dentry)
+			_p = &(*_p)->rb_left;
+		else if (xobject->dentry < dentry)
+			_p = &(*_p)->rb_right;
+		else
+			BUG(); /* uh oh... this dentry shouldn't be here */
+	}
+
+	rb_link_node(&object->active_node, _parent, _p);
+	rb_insert_color(&object->active_node, &cache->active_nodes);
+
+	write_unlock(&cache->active_lock);
+
+} /* end cachefiles_mark_object_active() */
+
+/*****************************************************************************/
+/*
+ * delete an object representation from the cache
+ * - file backed objects are unlinked
+ * - directory backed objects are stuffed into the graveyard for userspace to
+ *   delete
+ * - unlocks the directory mutex
+ */
+static int cachefiles_bury_object(struct cachefiles_cache *cache,
+				  struct dentry *dir,
+				  struct dentry *rep)
+{
+	struct dentry *grave, *alt, *trap;
+	struct qstr name;
+	const char *old_name;
+	char nbuffer[8 + 8 + 1];
+	int ret;
+
+	_enter(",'%*.*s','%*.*s'",
+	       dir->d_name.len, dir->d_name.len, dir->d_name.name,
+	       rep->d_name.len, rep->d_name.len, rep->d_name.name);
+
+	/* non-directories can just be unlinked */
+	if (!S_ISDIR(rep->d_inode->i_mode)) {
+		_debug("unlink stale object");
+		ret = dir->d_inode->i_op->unlink(dir->d_inode, rep);
+
+		mutex_unlock(&dir->d_inode->i_mutex);
+
+		if (ret == 0) {
+			_debug("d_delete");
+			d_delete(rep);
+		}
+		else if (ret == -EIO) {
+			cachefiles_io_error(cache, "Unlink failed");
+		}
+
+		_leave(" = %d", ret);
+		return ret;
+	}
+
+	/* directories have to be moved to the graveyard */
+	_debug("move stale object to graveyard");
+	mutex_unlock(&dir->d_inode->i_mutex);
+
+try_again:
+	/* first step is to make up a grave dentry in the graveyard */
+	sprintf(nbuffer, "%08x%08x",
+		(uint32_t) xtime.tv_sec,
+		(uint32_t) atomic_inc_return(&cache->gravecounter));
+
+	name.name = nbuffer;
+	name.len = strlen(name.name);
+
+	/* hash the name */
+	name.hash = full_name_hash(name.name, name.len);
+
+	if (dir->d_op && dir->d_op->d_hash) {
+		ret = dir->d_op->d_hash(dir, &name);
+		if (ret < 0) {
+			if (ret == -EIO)
+				cachefiles_io_error(cache, "Hash failed");
+
+			_leave(" = %d", ret);
+			return ret;
+		}
+	}
+
+	/* do the multiway lock magic */
+	trap = lock_rename(cache->graveyard, dir);
+
+	/* do some checks before getting the grave dentry */
+	if (rep->d_parent != dir) {
+		/* the entry was probably culled when we dropped the parent dir
+		 * lock */
+		unlock_rename(cache->graveyard, dir);
+		_leave(" = 0 [culled?]");
+		return 0;
+	}
+
+	if (!S_ISDIR(cache->graveyard->d_inode->i_mode)) {
+		unlock_rename(cache->graveyard, dir);
+		cachefiles_io_error(cache, "Graveyard no longer a directory");
+		return -EIO;
+	}
+
+	if (trap == rep) {
+		unlock_rename(cache->graveyard, dir);
+		cachefiles_io_error(cache, "May not make directory loop");
+		return -EIO;
+	}
+
+	if (d_mountpoint(rep)) {
+		unlock_rename(cache->graveyard, dir);
+		cachefiles_io_error(cache, "Mountpoint in cache");
+		return -EIO;
+	}
+
+	/* see if there's a dentry already there for this name */
+	grave = d_lookup(cache->graveyard, &name);
+	if (!grave) {
+		_debug("not found");
+
+		grave = d_alloc(cache->graveyard, &name);
+		if (!grave) {
+			unlock_rename(cache->graveyard, dir);
+			_leave(" = -ENOMEM");
+			return -ENOMEM;
+		}
+
+		alt = cache->graveyard->d_inode->i_op->lookup(
+			cache->graveyard->d_inode, grave, NULL);
+		if (IS_ERR(alt)) {
+			unlock_rename(cache->graveyard, dir);
+			dput(grave);
+
+			if (PTR_ERR(alt) == -ENOMEM) {
+				_leave(" = -ENOMEM");
+				return -ENOMEM;
+			}
+
+			cachefiles_io_error(cache, "Lookup error %ld",
+					    PTR_ERR(alt));
+			return -EIO;
+		}
+
+		if (alt) {
+			dput(grave);
+			grave = alt;
+		}
+	}
+
+	if (grave->d_inode) {
+		unlock_rename(cache->graveyard, dir);
+		dput(grave);
+		grave = NULL;
+		cond_resched();
+		goto try_again;
+	}
+
+	if (d_mountpoint(grave)) {
+		unlock_rename(cache->graveyard, dir);
+		dput(grave);
+		cachefiles_io_error(cache, "Mountpoint in graveyard");
+		return -EIO;
+	}
+
+	/* target should not be an ancestor of source */
+	if (trap == grave) {
+		unlock_rename(cache->graveyard, dir);
+		dput(grave);
+		cachefiles_io_error(cache, "May not make directory loop");
+		return -EIO;
+	}
+
+	/* attempt the rename */
+	DQUOT_INIT(dir->d_inode);
+	DQUOT_INIT(cache->graveyard->d_inode);
+
+	old_name = fsnotify_oldname_init(rep->d_name.name);
+
+	ret = dir->d_inode->i_op->rename(dir->d_inode, rep,
+					 cache->graveyard->d_inode, grave);
+
+	if (ret == 0) {
+		d_move(rep, grave);
+		fsnotify_move(dir->d_inode, cache->graveyard->d_inode,
+			      old_name, rep->d_name.name, 1,
+			      grave->d_inode, rep->d_inode);
+	}
+	else if (ret != -ENOMEM) {
+		cachefiles_io_error(cache, "Rename failed with error %d", ret);
+	}
+
+	fsnotify_oldname_free(old_name);
+
+	unlock_rename(cache->graveyard, dir);
+	dput(grave);
+	_leave(" = 0");
+	return 0;
+
+} /* end cachefiles_bury_object() */
+
+/*****************************************************************************/
+/*
+ * delete an object representation from the cache
+ */
+int cachefiles_delete_object(struct cachefiles_cache *cache,
+			     struct cachefiles_object *object)
+{
+	struct dentry *dir;
+	int ret;
+
+	_enter(",{%p}", object->dentry);
+
+	ASSERT(object->dentry);
+	ASSERT(object->dentry->d_inode);
+	ASSERT(object->dentry->d_parent);
+
+	dir = dget_parent(object->dentry);
+
+	mutex_lock(&dir->d_inode->i_mutex);
+	ret = cachefiles_bury_object(cache, dir, object->dentry);
+
+	dput(dir);
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_delete_object() */
+
+/*****************************************************************************/
+/*
+ * walk from the parent object to the child object through the backing
+ * filesystem, creating directories as we go
+ */
+int cachefiles_walk_to_object(struct cachefiles_object *parent,
+			      struct cachefiles_object *object,
+			      char *key,
+			      struct cachefiles_xattr *auxdata)
+{
+	struct cachefiles_cache *cache;
+	struct dentry *dir, *next = NULL, *new;
+	struct file *file;
+	struct qstr name;
+	uid_t fsuid;
+	gid_t fsgid;
+	int ret;
+
+	_enter("{%p}", parent->dentry);
+
+	cache = container_of(parent->fscache.cache,
+			     struct cachefiles_cache, cache);
+
+	ASSERT(parent->dentry);
+	ASSERT(parent->dentry->d_inode);
+
+	if (!(S_ISDIR(parent->dentry->d_inode->i_mode))) {
+		// TODO: convert file to dir
+		_leave("looking up in none directory");
+		return -ENOBUFS;
+	}
+
+	fsuid = current->fsuid;
+	fsgid = current->fsgid;
+	current->fsuid = 0;
+	current->fsgid = 0;
+
+	dir = dget(parent->dentry);
+
+advance:
+	/* attempt to transit the first directory component */
+	name.name = key;
+	key = strchr(key, '/');
+	if (key) {
+		name.len = key - (char *) name.name;
+		*key++ = 0;
+	}
+	else {
+		name.len = strlen(name.name);
+	}
+
+	/* hash the name */
+	name.hash = full_name_hash(name.name, name.len);
+
+	if (dir->d_op && dir->d_op->d_hash) {
+		ret = dir->d_op->d_hash(dir, &name);
+		if (ret < 0) {
+			cachefiles_io_error(cache, "Hash failed");
+			goto error_out2;
+		}
+	}
+
+lookup_again:
+	/* search the current directory for the element name */
+	_debug("lookup '%s' %x", name.name, name.hash);
+
+	mutex_lock(&dir->d_inode->i_mutex);
+
+	next = d_lookup(dir, &name);
+	if (!next) {
+		_debug("not found");
+
+		new = d_alloc(dir, &name);
+		if (!new)
+			goto nomem_d_alloc;
+
+		ASSERT(dir->d_inode->i_op);
+		ASSERT(dir->d_inode->i_op->lookup);
+
+		next = dir->d_inode->i_op->lookup(dir->d_inode, new, NULL);
+		if (IS_ERR(next))
+			goto lookup_error;
+
+		if (!next)
+			next = new;
+		else
+			dput(new);
+
+		if (next->d_inode) {
+			ret = -EPERM;
+			if (!next->d_inode->i_op ||
+			    !next->d_inode->i_op->setxattr ||
+			    !next->d_inode->i_op->getxattr ||
+			    !next->d_inode->i_op->removexattr)
+				goto error;
+
+			if (key && (!next->d_inode->i_op->lookup ||
+				    !next->d_inode->i_op->mkdir ||
+				    !next->d_inode->i_op->create ||
+				    !next->d_inode->i_op->rename ||
+				    !next->d_inode->i_op->rmdir ||
+				    !next->d_inode->i_op->unlink))
+				goto error;
+		}
+	}
+
+	_debug("next -> %p %s", next, next->d_inode ? "positive" : "negative");
+
+	if (!key)
+		object->new = !next->d_inode;
+
+	/* we need to create the object if it's negative */
+	if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) {
+		/* index objects and intervening tree levels must be subdirs */
+		if (!next->d_inode) {
+			DQUOT_INIT(dir->d_inode);
+			ret = dir->d_inode->i_op->mkdir(dir->d_inode, next, 0);
+			if (ret < 0)
+				goto create_error;
+
+			ASSERT(next->d_inode);
+
+			fsnotify_mkdir(dir->d_inode, next);
+
+			_debug("mkdir -> %p{%p{ino=%lu}}",
+			       next, next->d_inode, next->d_inode->i_ino);
+		}
+		/* we need to make sure a positive match found a directory */
+		else if (!S_ISDIR(next->d_inode->i_mode)) {
+			kerror("inode %lu is not a directory",
+			       next->d_inode->i_ino);
+			ret = -ENOBUFS;
+			goto error;
+		}
+	}
+	else {
+		/* non-index objects start out life as files */
+		if (!next->d_inode) {
+			DQUOT_INIT(dir->d_inode);
+			ret = dir->d_inode->i_op->create(dir->d_inode, next,
+							 S_IFREG, NULL);
+			if (ret < 0)
+				goto create_error;
+
+			ASSERT(next->d_inode);
+
+			fsnotify_create(dir->d_inode, next);
+
+			_debug("create -> %p{%p{ino=%lu}}",
+			       next, next->d_inode, next->d_inode->i_ino);
+		}
+		/* we need to make sure a positive match found a directory or a
+		 * file */
+		else if (!S_ISDIR(next->d_inode->i_mode) &&
+			 !S_ISREG(next->d_inode->i_mode)
+			 ) {
+			kerror("inode %lu is not a file or directory",
+			       next->d_inode->i_ino);
+			ret = -ENOBUFS;
+			goto error;
+		}
+	}
+
+	/* process the next component */
+	if (key) {
+		_debug("advance");
+		mutex_unlock(&dir->d_inode->i_mutex);
+		dput(dir);
+		dir = next;
+		next = NULL;
+		goto advance;
+	}
+
+	/* we've found the object we were looking for */
+	object->dentry = next;
+
+	/* if we've found that the terminal object exists, then we need to
+	 * check its attributes and delete it if it's out of date */
+	if (!object->new) {
+		_debug("validate '%*.*s'",
+		       next->d_name.len, next->d_name.len, next->d_name.name);
+
+		ret = cachefiles_check_object_xattr(object, auxdata);
+		if (ret == -ESTALE) {
+			/* delete the object (the deleter drops the directory
+			 * mutex) */
+			object->dentry = NULL;
+
+			ret = cachefiles_bury_object(cache, dir, next);
+			dput(next);
+			next = NULL;
+
+			if (ret < 0)
+				goto delete_error;
+
+			_debug("redo lookup");
+			goto lookup_again;
+		}
+	}
+
+	/* note that we're now using this object */
+	cachefiles_mark_object_active(cache, object);
+
+	mutex_unlock(&dir->d_inode->i_mutex);
+	dput(dir);
+	dir = NULL;
+
+	if (object->new) {
+		/* attach data to a newly constructed terminal object */
+		ret = cachefiles_set_object_xattr(object, auxdata);
+		if (ret < 0)
+			goto check_error;
+	}
+	else {
+		/* always update the atime on an object we've just looked up
+		 * (this is used to keep track of culling, and atimes are only
+		 * updated by read, write and readdir but not lookup or
+		 * open) */
+		touch_atime(cache->mnt, next);
+	}
+
+	/* open a file interface onto a data file */
+	if (object->type != FSCACHE_COOKIE_TYPE_INDEX) {
+		if (S_ISREG(object->dentry->d_inode->i_mode)) {
+			file = dentry_open_kernel(dget(object->dentry),
+						  mntget(cache->mnt), 0);
+			if (IS_ERR(file))
+				goto open_error;
+
+			object->backer = file;
+		}
+		else {
+			BUG(); // TODO: open file in data-class subdir
+		}
+	}
+
+	current->fsuid = fsuid;
+	current->fsgid = fsgid;
+	object->new = 0;
+
+	_leave(" = 0 [%lu]", object->dentry->d_inode->i_ino);
+	return 0;
+
+create_error:
+	if (ret == -EIO)
+		cachefiles_io_error(cache, "create/mkdir failed");
+	goto error;
+
+open_error:
+	ret = PTR_ERR(file);
+check_error:
+	write_lock(&cache->active_lock);
+	rb_erase(&object->active_node, &cache->active_nodes);
+	write_unlock(&cache->active_lock);
+
+	dput(object->dentry);
+	object->dentry = NULL;
+	goto error_out;
+
+delete_error:
+	_debug("delete error %d", ret);
+	goto error_out2;
+
+lookup_error:
+	_debug("lookup error %ld", PTR_ERR(next));
+	dput(new);
+	ret = PTR_ERR(next);
+	if (ret == -EIO)
+		cachefiles_io_error(cache, "Lookup failed");
+	next = NULL;
+	goto error;
+
+nomem_d_alloc:
+	ret = -ENOMEM;
+error:
+	mutex_unlock(&dir->d_inode->i_mutex);
+	dput(next);
+error_out2:
+	dput(dir);
+error_out:
+	current->fsuid = fsuid;
+	current->fsgid = fsgid;
+
+	_leave(" = ret");
+	return ret;
+
+} /* end cachefiles_walk_to_object() */
+
+/*****************************************************************************/
+/*
+ * get a subdirectory
+ */
+struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
+					struct dentry *dir,
+					const char *dirname)
+{
+	struct dentry *subdir, *new;
+	struct qstr name;
+	uid_t fsuid;
+	gid_t fsgid;
+	int ret;
+
+	_enter("");
+
+	/* set up the name */
+	name.name = dirname;
+	name.len = strlen(dirname);
+	name.hash = full_name_hash(name.name, name.len);
+
+	if (dir->d_op && dir->d_op->d_hash) {
+		ret = dir->d_op->d_hash(dir, &name);
+		if (ret < 0) {
+			if (ret == -EIO)
+				kerror("Hash failed");
+			_leave(" = %d", ret);
+			return ERR_PTR(ret);
+		}
+	}
+
+	/* search the current directory for the element name */
+	_debug("lookup '%s' %x", name.name, name.hash);
+
+	fsuid = current->fsuid;
+	fsgid = current->fsgid;
+	current->fsuid = 0;
+	current->fsgid = 0;
+
+	mutex_lock(&dir->d_inode->i_mutex);
+
+	subdir = d_lookup(dir, &name);
+	if (!subdir) {
+		_debug("not found");
+
+		new = d_alloc(dir, &name);
+		if (!new)
+			goto nomem_d_alloc;
+
+		subdir = dir->d_inode->i_op->lookup(dir->d_inode, new, NULL);
+		if (IS_ERR(subdir))
+			goto lookup_error;
+
+		if (!subdir)
+			subdir = new;
+		else
+			dput(new);
+	}
+
+	_debug("subdir -> %p %s",
+	       subdir, subdir->d_inode ? "positive" : "negative");
+
+	/* we need to create the subdir if it doesn't exist yet */
+	if (!subdir->d_inode) {
+		DQUOT_INIT(dir->d_inode);
+		ret = dir->d_inode->i_op->mkdir(dir->d_inode, subdir, 0700);
+		if (ret < 0)
+			goto mkdir_error;
+
+		ASSERT(subdir->d_inode);
+
+		fsnotify_mkdir(dir->d_inode, subdir);
+
+		_debug("mkdir -> %p{%p{ino=%lu}}",
+		       subdir,
+		       subdir->d_inode,
+		       subdir->d_inode->i_ino);
+	}
+
+	mutex_unlock(&dir->d_inode->i_mutex);
+
+	current->fsuid = fsuid;
+	current->fsgid = fsgid;
+
+	/* we need to make sure the subdir is a directory */
+	ASSERT(subdir->d_inode);
+
+	if (!S_ISDIR(subdir->d_inode->i_mode)) {
+		kerror("%s is not a directory", dirname);
+		ret = -EIO;
+		goto check_error;
+	}
+
+	ret = -EPERM;
+	if (!subdir->d_inode->i_op ||
+	    !subdir->d_inode->i_op->setxattr ||
+	    !subdir->d_inode->i_op->getxattr ||
+	    !subdir->d_inode->i_op->lookup ||
+	    !subdir->d_inode->i_op->mkdir ||
+	    !subdir->d_inode->i_op->create ||
+	    !subdir->d_inode->i_op->rename ||
+	    !subdir->d_inode->i_op->rmdir ||
+	    !subdir->d_inode->i_op->unlink)
+		goto check_error;
+
+	_leave(" = [%lu]", subdir->d_inode->i_ino);
+	return subdir;
+
+check_error:
+	dput(subdir);
+	_leave(" = %d [check]", ret);
+	return ERR_PTR(ret);
+
+mkdir_error:
+	mutex_unlock(&dir->d_inode->i_mutex);
+	kerror("mkdir %s failed with error %d", dirname, ret);
+	goto error_out;
+
+lookup_error:
+	mutex_unlock(&dir->d_inode->i_mutex);
+	dput(new);
+	ret = PTR_ERR(subdir);
+	kerror("Lookup %s failed with error %d", dirname, ret);
+	goto error_out;
+
+nomem_d_alloc:
+	mutex_unlock(&dir->d_inode->i_mutex);
+	ret = -ENOMEM;
+	goto error_out;
+
+error_out:
+	current->fsuid = fsuid;
+	current->fsgid = fsgid;
+	_leave(" = %d", ret);
+	return ERR_PTR(ret);
+
+} /* end cachefiles_get_directory() */
+
+/*****************************************************************************/
+/*
+ * cull an object if it's not in use
+ * - called only by cache manager daemon
+ */
+int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
+		    char *filename)
+{
+	struct cachefiles_object *object;
+	struct rb_node *_n;
+	struct dentry *victim, *new;
+	struct qstr name;
+	int ret;
+
+	_enter(",%*.*s/,%s",
+	       dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
+
+	/* set up the name */
+	name.name = filename;
+	name.len = strlen(filename);
+	name.hash = full_name_hash(name.name, name.len);
+
+	if (dir->d_op && dir->d_op->d_hash) {
+		ret = dir->d_op->d_hash(dir, &name);
+		if (ret < 0) {
+			if (ret == -EIO)
+				cachefiles_io_error(cache, "Hash failed");
+			_leave(" = %d", ret);
+			return ret;
+		}
+	}
+
+	/* look up the victim */
+	mutex_lock(&dir->d_inode->i_mutex);
+
+	victim = d_lookup(dir, &name);
+	if (!victim) {
+		_debug("not found");
+
+		new = d_alloc(dir, &name);
+		if (!new)
+			goto nomem_d_alloc;
+
+		victim = dir->d_inode->i_op->lookup(dir->d_inode, new, NULL);
+		if (IS_ERR(victim))
+			goto lookup_error;
+
+		if (!victim)
+			victim = new;
+		else
+			dput(new);
+	}
+
+	_debug("victim -> %p %s", victim, victim->d_inode ? "positive" : "negative");
+
+	/* if the object is no longer there then we probably retired the object
+	 * at the netfs's request whilst the cull was in progress
+	 */
+	if (!victim->d_inode) {
+		mutex_unlock(&dir->d_inode->i_mutex);
+		dput(victim);
+		_leave(" = -ENOENT [absent]");
+		return -ENOENT;
+	}
+
+	/* check to see if we're using this object */
+	read_lock(&cache->active_lock);
+
+	_n = cache->active_nodes.rb_node;
+
+	while (_n) {
+		object = rb_entry(_n, struct cachefiles_object, active_node);
+
+		if (object->dentry > victim)
+			_n = _n->rb_left;
+		else if (object->dentry < victim)
+			_n = _n->rb_right;
+		else
+			goto object_in_use;
+	}
+
+	read_unlock(&cache->active_lock);
+
+	/* okay... the victim is not being used so we can cull it
+	 * - start by marking it as stale
+	 */
+	_debug("victim is cullable");
+
+	ret = cachefiles_remove_object_xattr(cache, victim);
+	if (ret < 0)
+		goto error_unlock;
+
+	/*  actually remove the victim (drops the dir mutex) */
+	_debug("bury");
+
+	ret = cachefiles_bury_object(cache, dir, victim);
+	if (ret < 0)
+		goto error;
+
+	dput(victim);
+	_leave(" = 0");
+	return 0;
+
+
+object_in_use:
+	read_unlock(&cache->active_lock);
+	mutex_unlock(&dir->d_inode->i_mutex);
+	dput(victim);
+	_leave(" = -EBUSY [in use]");
+	return -EBUSY;
+
+nomem_d_alloc:
+	mutex_unlock(&dir->d_inode->i_mutex);
+	_leave(" = -ENOMEM");
+	return -ENOMEM;
+
+lookup_error:
+	mutex_unlock(&dir->d_inode->i_mutex);
+	dput(new);
+	ret = PTR_ERR(victim);
+	if (ret == -EIO)
+		cachefiles_io_error(cache, "Lookup failed");
+	goto choose_error;
+
+error_unlock:
+	mutex_unlock(&dir->d_inode->i_mutex);
+error:
+	dput(victim);
+choose_error:
+	if (ret == -ENOENT) {
+		/* file or dir now absent - probably retired by netfs */
+		_leave(" = -ESTALE [absent]");
+		return -ESTALE;
+	}
+
+	if (ret != -ENOMEM) {
+		kerror("Internal error: %d", ret);
+		ret = -EIO;
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_cull() */
diff --git a/fs/cachefiles/cf-proc.c b/fs/cachefiles/cf-proc.c
new file mode 100644
index 0000000..fce1385
--- /dev/null
+++ b/fs/cachefiles/cf-proc.c
@@ -0,0 +1,510 @@
+/* cf-proc.c: /proc/fs/cachefiles interface
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/namespace.h>
+#include <linux/statfs.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include "internal.h"
+
+static int cachefiles_proc_open(struct inode *, struct file *);
+static int cachefiles_proc_release(struct inode *, struct file *);
+static ssize_t cachefiles_proc_read(struct file *, char __user *, size_t, loff_t *);
+static ssize_t cachefiles_proc_write(struct file *, const char __user *, size_t, loff_t *);
+static int cachefiles_proc_brun(struct cachefiles_cache *cache, char *args);
+static int cachefiles_proc_bcull(struct cachefiles_cache *cache, char *args);
+static int cachefiles_proc_bstop(struct cachefiles_cache *cache, char *args);
+static int cachefiles_proc_cull(struct cachefiles_cache *cache, char *args);
+static int cachefiles_proc_debug(struct cachefiles_cache *cache, char *args);
+static int cachefiles_proc_dir(struct cachefiles_cache *cache, char *args);
+static int cachefiles_proc_tag(struct cachefiles_cache *cache, char *args);
+
+struct proc_dir_entry *cachefiles_proc;
+
+static unsigned long cachefiles_open;
+
+struct file_operations cachefiles_proc_fops = {
+	.open		= cachefiles_proc_open,
+	.release	= cachefiles_proc_release,
+	.read		= cachefiles_proc_read,
+	.write		= cachefiles_proc_write,
+};
+
+struct cachefiles_proc_cmd {
+	char name[8];
+	int (*handler)(struct cachefiles_cache *cache, char *args);
+};
+
+static const struct cachefiles_proc_cmd cachefiles_proc_cmds[] = {
+	{ "bind",	cachefiles_proc_bind	},
+	{ "brun",	cachefiles_proc_brun	},
+	{ "bcull",	cachefiles_proc_bcull	},
+	{ "bstop",	cachefiles_proc_bstop	},
+	{ "cull",	cachefiles_proc_cull	},
+	{ "debug",	cachefiles_proc_debug	},
+	{ "dir",	cachefiles_proc_dir	},
+	{ "tag",	cachefiles_proc_tag	},
+	{ "",		NULL			}
+};
+
+
+/*****************************************************************************/
+/*
+ * do various checks
+ */
+static int cachefiles_proc_open(struct inode *inode, struct file *file)
+{
+	struct cachefiles_cache *cache;
+
+	_enter("");
+
+	/* only the superuser may do this */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* /proc/fs/cachefiles may only be open once at a time */
+	if (xchg(&cachefiles_open, 1) == 1)
+		return -EBUSY;
+
+	/* allocate a cache record */
+	cache = kzalloc(sizeof(struct cachefiles_cache), GFP_KERNEL);
+	if (!cache) {
+		cachefiles_open = 0;
+		return -ENOMEM;
+	}
+
+	cache->active_nodes = RB_ROOT;
+	rwlock_init(&cache->active_lock);
+
+	/* set default caching limits
+	 * - limit at 1% free space
+	 * - cull below 5% free space
+	 * - cease culling above 7% free space
+	 */
+	cache->brun_percent = 7;
+	cache->bcull_percent = 5;
+	cache->bstop_percent = 1;
+
+	file->private_data = cache;
+	cache->cachefilesd = file;
+	return 0;
+
+} /* end cachefiles_proc_open() */
+
+/*****************************************************************************/
+/*
+ * release a cache
+ */
+static int cachefiles_proc_release(struct inode *inode, struct file *file)
+{
+	struct cachefiles_cache *cache = file->private_data;
+
+	_enter("");
+
+	ASSERT(cache);
+
+	set_bit(CACHEFILES_DEAD, &cache->flags);
+
+	cachefiles_proc_unbind(cache);
+
+	ASSERT(!cache->active_nodes.rb_node);
+
+	/* clean up the control file interface */
+	cache->cachefilesd = NULL;
+	file->private_data = NULL;
+	cachefiles_open = 0;
+
+	kfree(cache);
+
+	_leave("");
+	return 0;
+
+} /* end cachefiles_proc_release() */
+
+/*****************************************************************************/
+/*
+ * read the cache state
+ */
+static ssize_t cachefiles_proc_read(struct file *file, char __user *_buffer,
+				    size_t buflen, loff_t *pos)
+{
+	struct cachefiles_cache *cache = file->private_data;
+	char buffer[256];
+	int n;
+
+	_enter(",,%zu,", buflen);
+
+	if (!test_bit(CACHEFILES_READY, &cache->flags))
+		return 0;
+
+	/* check how much space the cache has */
+	cachefiles_has_space(cache, 0);
+
+	/* summarise */
+	n = snprintf(buffer, sizeof(buffer),
+		     "cull=%c"
+		     " brun=%llx"
+		     " bcull=%llx"
+		     " bstop=%llx",
+		     test_bit(CACHEFILES_CULLING, &cache->flags) ? '1' : '0',
+		     cache->brun,
+		     cache->bcull,
+		     cache->bstop
+		     );
+
+	if (n > buflen)
+		return -EMSGSIZE;
+
+	if (copy_to_user(_buffer, buffer, n) != 0)
+		return -EFAULT;
+
+	return n;
+
+} /* end cachefiles_proc_read() */
+
+/*****************************************************************************/
+/*
+ * command the cache
+ */
+static ssize_t cachefiles_proc_write(struct file *file,
+				     const char __user *_data, size_t datalen,
+				     loff_t *pos)
+{
+	const struct cachefiles_proc_cmd *cmd;
+	struct cachefiles_cache *cache = file->private_data;
+	ssize_t ret;
+	char *data, *args, *cp;
+
+	_enter(",,%zu,", datalen);
+
+	ASSERT(cache);
+
+	if (test_bit(CACHEFILES_DEAD, &cache->flags))
+		return -EIO;
+
+	if (datalen < 0 || datalen > PAGE_SIZE - 1)
+		return -EOPNOTSUPP;
+
+	/* drag the command string into the kernel so we can parse it */
+	data = kmalloc(datalen + 1, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(data, _data, datalen) != 0)
+		goto error;
+
+	data[datalen] = '\0';
+
+	ret = -EINVAL;
+	if (memchr(data, '\0', datalen))
+		goto error;
+
+	/* strip any newline */
+	cp = memchr(data, '\n', datalen);
+	if (cp) {
+		if (cp == data)
+			goto error;
+
+		*cp = '\0';
+	}
+
+	/* parse the command */
+	ret = -EOPNOTSUPP;
+
+	for (args = data; *args; args++)
+		if (isspace(*args))
+			break;
+	if (*args) {
+		if (args == data)
+			goto error;
+		*args = '\0';
+		for (args++; isspace(*args); args++)
+			continue;
+	}
+
+	/* run the appropriate command handler */
+	for (cmd = cachefiles_proc_cmds; cmd->name[0]; cmd++)
+		if (strcmp(cmd->name, data) == 0)
+			goto found_command;
+
+error:
+	kfree(data);
+	_leave(" = %d", ret);
+	return ret;
+
+found_command:
+	mutex_lock(&file->f_dentry->d_inode->i_mutex);
+
+	ret = -EIO;
+	if (!test_bit(CACHEFILES_DEAD, &cache->flags))
+		ret = cmd->handler(cache, args);
+
+	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+
+	if (ret == 0)
+		ret = datalen;
+	goto error;
+
+} /* end cachefiles_proc_write() */
+
+/*****************************************************************************/
+/*
+ * give a range error for cache space constraints
+ * - can be tail-called
+ */
+static int cachefiles_proc_range_error(struct cachefiles_cache *cache, char *args)
+{
+	kerror("Free space limits must be in range"
+	       " 0%%<=bstop<bcull<brun<100%%");
+
+	return -EINVAL;
+
+} /* end cachefiles_proc_range_error() */
+
+/*****************************************************************************/
+/*
+ * set the percentage of blocks at which to stop culling
+ * - command: "brun <N>%"
+ */
+static int cachefiles_proc_brun(struct cachefiles_cache *cache, char *args)
+{
+	unsigned long brun;
+
+	_enter(",%s", args);
+
+	if (!*args)
+		return -EINVAL;
+
+	brun = simple_strtoul(args, &args, 10);
+	if (args[0] != '%' || args[1] != '\0')
+		return -EINVAL;
+
+	if (brun <= cache->bcull_percent || brun >= 100)
+		return cachefiles_proc_range_error(cache, args);
+
+	cache->brun_percent = brun;
+	return 0;
+
+} /* end cachefiles_proc_brun() */
+
+/*****************************************************************************/
+/*
+ * set the percentage of blocks at which to start culling
+ * - command: "bcull <N>%"
+ */
+static int cachefiles_proc_bcull(struct cachefiles_cache *cache, char *args)
+{
+	unsigned long bcull;
+
+	_enter(",%s", args);
+
+	if (!*args)
+		return -EINVAL;
+
+	bcull = simple_strtoul(args, &args, 10);
+	if (args[0] != '%' || args[1] != '\0')
+		return -EINVAL;
+
+	if (bcull <= cache->bstop_percent || bcull >= cache->brun_percent)
+		return cachefiles_proc_range_error(cache, args);
+
+	cache->bcull_percent = bcull;
+	return 0;
+
+} /* end cachefiles_proc_bcull() */
+
+/*****************************************************************************/
+/*
+ * set the percentage of blocks at which to stop allocating
+ * - command: "bstop <N>%"
+ */
+static int cachefiles_proc_bstop(struct cachefiles_cache *cache, char *args)
+{
+	unsigned long bstop;
+
+	_enter(",%s", args);
+
+	if (!*args)
+		return -EINVAL;
+
+	bstop = simple_strtoul(args, &args, 10);
+	if (args[0] != '%' || args[1] != '\0')
+		return -EINVAL;
+
+	if (bstop < 0 || bstop >= cache->bcull_percent)
+		return cachefiles_proc_range_error(cache, args);
+
+	cache->bstop_percent = bstop;
+	return 0;
+
+} /* end cachefiles_proc_bstop() */
+
+/*****************************************************************************/
+/*
+ * set the cache directory
+ * - command: "dir <name>"
+ */
+static int cachefiles_proc_dir(struct cachefiles_cache *cache, char *args)
+{
+	char *dir;
+
+	_enter(",%s", args);
+
+	if (!*args) {
+		kerror("Empty directory specified");
+		return -EINVAL;
+	}
+
+	if (cache->rootdirname) {
+		kerror("Second cache directory specified");
+		return -EEXIST;
+	}
+
+	dir = kstrdup(args, GFP_KERNEL);
+	if (!dir)
+		return -ENOMEM;
+
+	cache->rootdirname = dir;
+	return 0;
+
+} /* end cachefiles_proc_dir() */
+
+/*****************************************************************************/
+/*
+ * set the cache tag
+ * - command: "tag <name>"
+ */
+static int cachefiles_proc_tag(struct cachefiles_cache *cache, char *args)
+{
+	char *tag;
+
+	_enter(",%s", args);
+
+	if (!*args) {
+		kerror("Empty tag specified");
+		return -EINVAL;
+	}
+
+	if (cache->tag)
+		return -EEXIST;
+
+	tag = kstrdup(args, GFP_KERNEL);
+	if (!tag)
+		return -ENOMEM;
+
+	cache->tag = tag;
+	return 0;
+
+} /* end cachefiles_proc_tag() */
+
+/*****************************************************************************/
+/*
+ * request a node in the cache be culled
+ * - command: "cull <dirfd> <name>"
+ */
+static int cachefiles_proc_cull(struct cachefiles_cache *cache, char *args)
+{
+	struct dentry *dir;
+	struct file *dirfile;
+	int dirfd, fput_needed, ret;
+
+	_enter(",%s", args);
+
+	dirfd = simple_strtoul(args, &args, 0);
+
+	if (!args || !isspace(*args))
+		goto inval;
+
+	while (isspace(*args))
+		args++;
+
+	if (!*args)
+		goto inval;
+
+	if (strchr(args, '/'))
+		goto inval;
+
+	if (!test_bit(CACHEFILES_READY, &cache->flags)) {
+		kerror("cull applied to unready cache");
+		return -EIO;
+	}
+
+	if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
+		kerror("cull applied to dead cache");
+		return -EIO;
+	}
+
+	/* extract the directory dentry from the fd */
+	dirfile = fget_light(dirfd, &fput_needed);
+	if (!dirfile) {
+		kerror("cull dirfd not open");
+		return -EBADF;
+	}
+
+	dir = dget(dirfile->f_dentry);
+	fput_light(dirfile, fput_needed);
+	dirfile = NULL;
+
+	if (!S_ISDIR(dir->d_inode->i_mode))
+		goto notdir;
+
+	ret = cachefiles_cull(cache, dir, args);
+
+	dput(dir);
+	_leave(" = %d", ret);
+	return ret;
+
+notdir:
+	dput(dir);
+	kerror("cull command requires dirfd to be a directory");
+	return -ENOTDIR;
+
+inval:
+	kerror("cull command requires dirfd and filename");
+	return -EINVAL;
+
+} /* end cachefiles_proc_cull() */
+
+/*****************************************************************************/
+/*
+ * set debugging mode
+ * - command: "debug <mask>"
+ */
+static int cachefiles_proc_debug(struct cachefiles_cache *cache, char *args)
+{
+	unsigned long mask;
+
+	_enter(",%s", args);
+
+	mask = simple_strtoul(args, &args, 0);
+
+	if (!args || !isspace(*args))
+		goto inval;
+
+	cachefiles_debug = mask;
+	_leave(" = 0");
+	return 0;
+
+inval:
+	kerror("debug command requires mask");
+	return -EINVAL;
+
+} /* end cachefiles_proc_debug() */
diff --git a/fs/cachefiles/cf-sysctl.c b/fs/cachefiles/cf-sysctl.c
new file mode 100644
index 0000000..4d6fc04
--- /dev/null
+++ b/fs/cachefiles/cf-sysctl.c
@@ -0,0 +1,69 @@
+/* cf-sysctl.c: Control parameters
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#include "internal.h"
+
+static struct ctl_table_header *cachefiles_sysctl;
+
+/*
+ * Something that isn't CTL_ANY, CTL_NONE or a value that may clash.
+ * Use the same values as fs/nfs/sysctl.c
+ */
+#define CTL_UNNUMBERED -2
+
+static ctl_table cachefiles_sysctl_table[] = {
+        {
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "debug",
+		.data		= &cachefiles_debug,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_minmax
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table cachefiles_sysctl_dir[] = {
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "cachefiles",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= cachefiles_sysctl_table
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table cachefiles_sysctl_root[] = {
+	{
+		.ctl_name = CTL_FS,
+		.procname = "fs",
+		.mode = 0555,
+		.child = cachefiles_sysctl_dir,
+	},
+	{ .ctl_name = 0 }
+};
+
+int __init cachefiles_sysctl_init(void)
+{
+	cachefiles_sysctl = register_sysctl_table(cachefiles_sysctl_root, 0);
+	if (!cachefiles_sysctl)
+		return -ENOMEM;
+	return 0;
+}
+
+void __exit cachefiles_sysctl_cleanup(void)
+{
+	unregister_sysctl_table(cachefiles_sysctl);
+}
diff --git a/fs/cachefiles/cf-xattr.c b/fs/cachefiles/cf-xattr.c
new file mode 100644
index 0000000..a811667
--- /dev/null
+++ b/fs/cachefiles/cf-xattr.c
@@ -0,0 +1,299 @@
+/* cf-xattr.c: CacheFiles extended attribute management
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/quotaops.h>
+#include <linux/xattr.h>
+#include "internal.h"
+
+static const char cachefiles_xattr_cache[] = XATTR_USER_PREFIX "CacheFiles.cache";
+
+/*****************************************************************************/
+/*
+ * check the type label on an object
+ * - done using xattrs
+ */
+int cachefiles_check_object_type(struct cachefiles_object *object)
+{
+	struct dentry *dentry = object->dentry;
+	char type[3], xtype[3];
+	int ret;
+
+	ASSERT(dentry);
+	ASSERT(dentry->d_inode);
+	ASSERT(dentry->d_inode->i_op);
+	ASSERT(dentry->d_inode->i_op->setxattr);
+	ASSERT(dentry->d_inode->i_op->getxattr);
+
+	if (!object->fscache.cookie)
+		strcpy(type, "C3");
+	else
+		snprintf(type, 3, "%02x", object->fscache.cookie->def->type);
+
+	_enter("%p{%s}", object, type);
+
+	mutex_lock(&dentry->d_inode->i_mutex);
+
+	/* attempt to install a type label directly */
+	ret = dentry->d_inode->i_op->setxattr(dentry, cachefiles_xattr_cache,
+					      type, 2, XATTR_CREATE);
+	if (ret == 0) {
+		_debug("SET");
+		fsnotify_xattr(dentry);
+		mutex_unlock(&dentry->d_inode->i_mutex);
+		goto error;
+	}
+
+	if (ret != -EEXIST) {
+		kerror("Can't set xattr on %*.*s [%lu] (err %d)",
+		       dentry->d_name.len, dentry->d_name.len,
+		       dentry->d_name.name, dentry->d_inode->i_ino,
+		       -ret);
+		goto error;
+	}
+
+	/* read the current type label */
+	ret = dentry->d_inode->i_op->getxattr(dentry, cachefiles_xattr_cache,
+					      xtype, 3);
+	if (ret < 0) {
+		if (ret == -ERANGE)
+			goto bad_type_length;
+
+		kerror("Can't read xattr on %*.*s [%lu] (err %d)",
+		       dentry->d_name.len, dentry->d_name.len,
+		       dentry->d_name.name, dentry->d_inode->i_ino,
+		       -ret);
+		goto error;
+	}
+
+	/* check the type is what we're expecting */
+	if (ret != 2)
+		goto bad_type_length;
+
+	if (xtype[0] != type[0] || xtype[1] != type[1])
+		goto bad_type;
+
+	ret = 0;
+
+error:
+	mutex_unlock(&dentry->d_inode->i_mutex);
+	_leave(" = %d", ret);
+	return ret;
+
+bad_type_length:
+	kerror("Cache object %lu type xattr length incorrect",
+	       dentry->d_inode->i_ino);
+	ret = -EIO;
+	goto error;
+
+bad_type:
+	xtype[2] = 0;
+	kerror("Cache object %*.*s [%lu] type %s not %s",
+	       dentry->d_name.len, dentry->d_name.len,
+	       dentry->d_name.name, dentry->d_inode->i_ino,
+	       xtype, type);
+	ret = -EIO;
+	goto error;
+
+} /* end cachefiles_check_object_type() */
+
+/*****************************************************************************/
+/*
+ * set the state xattr on a cache file
+ */
+int cachefiles_set_object_xattr(struct cachefiles_object *object,
+				struct cachefiles_xattr *auxdata)
+{
+	struct dentry *dentry = object->dentry;
+	int ret;
+
+	ASSERT(object->fscache.cookie);
+	ASSERT(dentry);
+	ASSERT(dentry->d_inode->i_op->setxattr);
+
+	_enter("%p,#%d", object, auxdata->len);
+
+	/* attempt to install the cache metadata directly */
+	mutex_lock(&dentry->d_inode->i_mutex);
+
+	_debug("SET %s #%u",
+	       object->fscache.cookie->def->name, auxdata->len);
+
+	ret = dentry->d_inode->i_op->setxattr(dentry, cachefiles_xattr_cache,
+					      &auxdata->type, auxdata->len,
+					      XATTR_CREATE);
+	if (ret == 0)
+		fsnotify_xattr(dentry);
+	else if (ret != -ENOMEM)
+		cachefiles_io_error_obj(object,
+					"Failed to set xattr with error %d",
+					ret);
+
+	mutex_unlock(&dentry->d_inode->i_mutex);
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_set_object_xattr() */
+
+/*****************************************************************************/
+/*
+ * check the state xattr on a cache file
+ * - return -ESTALE if the object should be deleted
+ */
+int cachefiles_check_object_xattr(struct cachefiles_object *object,
+				  struct cachefiles_xattr *auxdata)
+{
+	struct cachefiles_xattr *auxbuf;
+	struct dentry *dentry = object->dentry;
+	int ret;
+
+	_enter("%p,#%d", object, auxdata->len);
+
+	ASSERT(dentry);
+	ASSERT(dentry->d_inode);
+	ASSERT(dentry->d_inode->i_op->setxattr);
+	ASSERT(dentry->d_inode->i_op->getxattr);
+
+	auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL);
+	if (!auxbuf) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	mutex_lock(&dentry->d_inode->i_mutex);
+
+	/* read the current type label */
+	ret = dentry->d_inode->i_op->getxattr(dentry, cachefiles_xattr_cache,
+					      &auxbuf->type, 512 + 1);
+	if (ret < 0) {
+		if (ret == -ENODATA)
+			goto stale; /* no attribute - power went off
+				     * mid-cull? */
+
+		if (ret == -ERANGE)
+			goto bad_type_length;
+
+		cachefiles_io_error_obj(object,
+					"can't read xattr on %lu (err %d)",
+					dentry->d_inode->i_ino, -ret);
+		goto error;
+	}
+
+	/* check the on-disk object */
+	if (ret < 1)
+		goto bad_type_length;
+
+	if (auxbuf->type != auxdata->type)
+		goto stale;
+
+	auxbuf->len = ret;
+
+	/* consult the netfs */
+	if (object->fscache.cookie->def->check_aux) {
+		fscache_checkaux_t result;
+		unsigned int dlen;
+
+		dlen = auxbuf->len - 1;
+
+		_debug("checkaux %s #%u",
+		       object->fscache.cookie->def->name, dlen);
+
+		result = object->fscache.cookie->def->check_aux(
+			object->fscache.cookie->netfs_data,
+			&auxbuf->data, dlen);
+
+		switch (result) {
+			/* entry okay as is */
+		case FSCACHE_CHECKAUX_OKAY:
+			goto okay;
+
+			/* entry requires update */
+		case FSCACHE_CHECKAUX_NEEDS_UPDATE:
+			break;
+
+			/* entry requires deletion */
+		case FSCACHE_CHECKAUX_OBSOLETE:
+			goto stale;
+
+		default:
+			BUG();
+		}
+
+		/* update the current label */
+		ret = dentry->d_inode->i_op->setxattr(dentry,
+						      cachefiles_xattr_cache,
+						      &auxdata->type,
+						      auxdata->len,
+						      XATTR_REPLACE);
+		if (ret < 0) {
+			cachefiles_io_error_obj(object,
+						"Can't update xattr on %lu"
+						" (error %d)",
+						dentry->d_inode->i_ino, -ret);
+			goto error;
+		}
+	}
+
+okay:
+	ret = 0;
+
+error:
+	mutex_unlock(&dentry->d_inode->i_mutex);
+	kfree(auxbuf);
+	_leave(" = %d", ret);
+	return ret;
+
+bad_type_length:
+	kerror("Cache object %lu xattr length incorrect",
+	       dentry->d_inode->i_ino);
+	ret = -EIO;
+	goto error;
+
+stale:
+	ret = -ESTALE;
+	goto error;
+
+} /* end cachefiles_check_object_xattr() */
+
+/*****************************************************************************/
+/*
+ * remove the object's xattr to mark it stale
+ */
+int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
+				   struct dentry *dentry)
+{
+	int ret;
+
+	mutex_lock(&dentry->d_inode->i_mutex);
+
+	ret = dentry->d_inode->i_op->removexattr(dentry,
+						 cachefiles_xattr_cache);
+
+	mutex_unlock(&dentry->d_inode->i_mutex);
+
+	if (ret < 0) {
+		if (ret == -ENOENT || ret == -ENODATA)
+			ret = 0;
+		else if (ret != -ENOMEM)
+			cachefiles_io_error(cache,
+					    "Can't remove xattr from %lu"
+					    " (error %d)",
+					    dentry->d_inode->i_ino, -ret);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end cachefiles_remove_object_xattr() */
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
new file mode 100644
index 0000000..9b4b76c
--- /dev/null
+++ b/fs/cachefiles/internal.h
@@ -0,0 +1,308 @@
+/* internal.h: general netfs cache on cache files internal defs
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * CacheFiles layout:
+ *
+ *	/..../CacheDir/
+ *		index
+ *		0/
+ *		1/
+ *		2/
+ *		  index
+ *		  0/
+ *		  1/
+ *		  2/
+ *		    index
+ *		    0
+ *		    1
+ *		    2
+ */
+
+#include <linux/fscache-cache.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+struct cachefiles_cache;
+struct cachefiles_object;
+
+extern unsigned long cachefiles_debug;
+#define CACHEFILES_DEBUG_KENTER	1
+#define CACHEFILES_DEBUG_KLEAVE	2
+#define CACHEFILES_DEBUG_KDEBUG	4
+
+extern struct fscache_cache_ops cachefiles_cache_ops;
+extern struct proc_dir_entry *cachefiles_proc;
+extern struct file_operations cachefiles_proc_fops;
+
+/*****************************************************************************/
+/*
+ * node records
+ */
+struct cachefiles_object {
+	struct fscache_object		fscache;	/* fscache handle */
+	struct dentry			*dentry;	/* the file/dir representing this object */
+	struct file			*backer;	/* backing file */
+	loff_t				i_size;		/* object size */
+	atomic_t			usage;		/* basic object usage count */
+	atomic_t			fscache_usage;	/* FSDEF object usage count */
+	uint8_t				type;		/* object type */
+	uint8_t				new;		/* T if object new */
+	spinlock_t			work_lock;
+	struct rw_semaphore		sem;
+	struct work_struct		read_work;	/* read page copier */
+	struct list_head		read_list;	/* pages to copy */
+	struct list_head		read_pend_list;	/* pages to pending read from backer */
+	struct work_struct		write_work;	/* page writer */
+	struct list_head		write_list;	/* pages to store */
+	struct rb_node			active_node;	/* link in active tree (dentry is key) */
+};
+
+extern kmem_cache_t *cachefiles_object_jar;
+
+/*****************************************************************************/
+/*
+ * Cache files cache definition
+ */
+struct cachefiles_cache {
+	struct fscache_cache		cache;		/* FS-Cache record */
+	struct vfsmount			*mnt;		/* mountpoint holding the cache */
+	struct dentry			*graveyard;	/* directory into which dead objects go */
+	struct file			*cachefilesd;	/* manager daemon handle */
+	struct rb_root			active_nodes;	/* active nodes (can't be culled) */
+	rwlock_t			active_lock;	/* lock for active_nodes */
+	atomic_t			gravecounter;	/* graveyard uniquifier */
+	unsigned			brun_percent;	/* when to stop culling (%) */
+	unsigned			bcull_percent;	/* when to start culling (%) */
+	unsigned			bstop_percent;	/* when to stop allocating (%) */
+	unsigned			bsize;		/* cache's block size */
+	unsigned			bshift;		/* min(log2 (PAGE_SIZE / bsize), 0) */
+	sector_t			brun;		/* when to stop culling */
+	sector_t			bcull;		/* when to start culling */
+	sector_t			bstop;		/* when to stop allocating */
+	unsigned long			flags;
+#define CACHEFILES_READY		0	/* T if cache prepared */
+#define CACHEFILES_DEAD			1	/* T if cache dead */
+#define CACHEFILES_CULLING		2	/* T if cull engaged */
+	char				*rootdirname;	/* name of cache root directory */
+	char				*tag;		/* cache binding tag */
+};
+
+/*****************************************************************************/
+/*
+ * backing file read tracking
+ */
+struct cachefiles_one_read {
+	wait_queue_t			monitor;	/* link into monitored waitqueue */
+	struct page			*back_page;	/* backing file page we're waiting for */
+	struct page			*netfs_page;	/* netfs page we're going to fill */
+	struct cachefiles_object	*object;
+	struct list_head		obj_link;	/* link in object's lists */
+	fscache_rw_complete_t		end_io_func;
+	void				*context;
+};
+
+/*****************************************************************************/
+/*
+ * backing file write tracking
+ */
+struct cachefiles_one_write {
+	struct page			*netfs_page;	/* netfs page to copy */
+	struct cachefiles_object	*object;
+	struct list_head		obj_link;	/* link in object's lists */
+	fscache_rw_complete_t		end_io_func;
+	void				*context;
+};
+
+/*****************************************************************************/
+/*
+ * auxiliary data xattr buffer
+ */
+struct cachefiles_xattr {
+	uint16_t			len;
+	uint8_t				type;
+	uint8_t				data[];
+};
+
+
+/* cf-bind.c */
+extern int cachefiles_proc_bind(struct cachefiles_cache *cache, char *args);
+extern void cachefiles_proc_unbind(struct cachefiles_cache *cache);
+
+/* cf-interface.c */
+extern void cachefiles_read_copier_work(void *_object);
+extern void cachefiles_write_work(void *_object);
+extern int cachefiles_has_space(struct cachefiles_cache *cache, unsigned nr);
+
+/* cf-key.c */
+extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type);
+
+/* cf-namei.c */
+extern int cachefiles_delete_object(struct cachefiles_cache *cache,
+				    struct cachefiles_object *object);
+extern int cachefiles_walk_to_object(struct cachefiles_object *parent,
+				     struct cachefiles_object *object,
+				     char *key,
+				     struct cachefiles_xattr *auxdata);
+extern struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
+					       struct dentry *dir,
+					       const char *name);
+
+extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
+			   char *filename);
+
+/* cf-sysctl.c */
+extern int __init cachefiles_sysctl_init(void);
+extern void __exit cachefiles_sysctl_cleanup(void);
+
+/* cf-xattr.c */
+extern int cachefiles_check_object_type(struct cachefiles_object *object);
+extern int cachefiles_set_object_xattr(struct cachefiles_object *object,
+				       struct cachefiles_xattr *auxdata);
+extern int cachefiles_check_object_xattr(struct cachefiles_object *object,
+					 struct cachefiles_xattr *auxdata);
+extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
+					  struct dentry *dentry);
+
+
+/*****************************************************************************/
+/*
+ * error handling
+ */
+#define kerror(FMT,...) printk(KERN_ERR "CacheFiles: "FMT"\n" ,##__VA_ARGS__);
+
+#define cachefiles_io_error(___cache, FMT, ...)		\
+do {							\
+	kerror("I/O Error: " FMT ,##__VA_ARGS__);	\
+	fscache_io_error(&(___cache)->cache);		\
+	set_bit(CACHEFILES_DEAD, &(___cache)->flags);	\
+} while(0)
+
+#define cachefiles_io_error_obj(object, FMT, ...)			\
+do {									\
+	struct cachefiles_cache *___cache;				\
+									\
+	___cache = container_of((object)->fscache.cache,		\
+				struct cachefiles_cache, cache);	\
+	cachefiles_io_error(___cache, FMT ,##__VA_ARGS__);		\
+} while(0)
+
+
+/*****************************************************************************/
+/*
+ * debug tracing
+ */
+#define dbgprintk(FMT,...) \
+	printk("[%-6.6s] "FMT"\n",current->comm ,##__VA_ARGS__)
+
+/* make sure we maintain the format strings, even when debugging is disabled */
+static inline void _dbprintk(const char *fmt, ...)
+	__attribute__((format(printf,1,2)));
+static inline void _dbprintk(const char *fmt, ...)
+{
+}
+
+#define kenter(FMT,...)	dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define kleave(FMT,...)	dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define kdebug(FMT,...)	dbgprintk(FMT ,##__VA_ARGS__)
+
+
+#if defined(__KDEBUG)
+#define _enter(FMT,...)	kenter(FMT,##__VA_ARGS__)
+#define _leave(FMT,...)	kleave(FMT,##__VA_ARGS__)
+#define _debug(FMT,...)	kdebug(FMT,##__VA_ARGS__)
+
+#elif defined(CONFIG_CACHEFILES_DEBUG)
+#define _enter(FMT,...)					\
+do {							\
+	if (cachefiles_debug & CACHEFILES_DEBUG_KENTER)	\
+		kenter(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#define _leave(FMT,...)					\
+do {							\
+	if (cachefiles_debug & CACHEFILES_DEBUG_KLEAVE)	\
+		kleave(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#define _debug(FMT,...)					\
+do {							\
+	if (cachefiles_debug & CACHEFILES_DEBUG_KDEBUG)	\
+		kdebug(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#else
+#define _enter(FMT,...)	_dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define _leave(FMT,...)	_dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define _debug(FMT,...)	_dbprintk(FMT ,##__VA_ARGS__)
+#endif
+
+#if 1 // defined(__KDEBUGALL)
+
+#define ASSERT(X)							\
+do {									\
+	if (unlikely(!(X))) {						\
+		printk(KERN_ERR "\n");					\
+		printk(KERN_ERR "CacheFiles: Assertion failed\n");	\
+		BUG();							\
+	}								\
+} while(0)
+
+#define ASSERTCMP(X, OP, Y)						\
+do {									\
+	if (unlikely(!((X) OP (Y)))) {					\
+		printk(KERN_ERR "\n");					\
+		printk(KERN_ERR "CacheFiles: Assertion failed\n");	\
+		printk(KERN_ERR "%lx " #OP " %lx is false\n",		\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		BUG();							\
+	}								\
+} while(0)
+
+#define ASSERTIF(C, X)							\
+do {									\
+	if (unlikely((C) && !(X))) {					\
+		printk(KERN_ERR "\n");					\
+		printk(KERN_ERR "CacheFiles: Assertion failed\n");	\
+		BUG();							\
+	}								\
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y)					\
+do {									\
+	if (unlikely((C) && !((X) OP (Y)))) {				\
+		printk(KERN_ERR "\n");					\
+		printk(KERN_ERR "CacheFiles: Assertion failed\n");	\
+		printk(KERN_ERR "%lx " #OP " %lx is false\n",		\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		BUG();							\
+	}								\
+} while(0)
+
+#else
+
+#define ASSERT(X)				\
+do {						\
+} while(0)
+
+#define ASSERTCMP(X, OP, Y)			\
+do {						\
+} while(0)
+
+#define ASSERTIF(C, X)				\
+do {						\
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y)		\
+do {						\
+} while(0)
+
+#endif
diff --git a/fs/dcache.c b/fs/dcache.c
index 1b4a3a3..780f014 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -547,6 +547,139 @@ repeat:
 }
 
 /*
+ * destroy a single subtree of dentries for unmount
+ * - see the comments on shrink_dcache_for_umount() for a description of the
+ *   locking
+ */
+static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	BUG_ON(!IS_ROOT(dentry));
+
+	/* detach this root from the system */
+	spin_lock(&dcache_lock);
+	if (!list_empty(&dentry->d_lru)) {
+		dentry_stat.nr_unused--;
+		list_del_init(&dentry->d_lru);
+	}
+	__d_drop(dentry);
+	spin_unlock(&dcache_lock);
+
+	for (;;) {
+		/* descend to the first leaf in the current subtree */
+		while (!list_empty(&dentry->d_subdirs)) {
+			struct dentry *loop;
+
+			/* this is a branch with children - detach all of them
+			 * from the system in one go */
+			spin_lock(&dcache_lock);
+			list_for_each_entry(loop, &dentry->d_subdirs,
+					    d_u.d_child) {
+				if (!list_empty(&loop->d_lru)) {
+					dentry_stat.nr_unused--;
+					list_del_init(&loop->d_lru);
+				}
+
+				__d_drop(loop);
+				cond_resched_lock(&dcache_lock);
+			}
+			spin_unlock(&dcache_lock);
+
+			/* move to the first child */
+			dentry = list_entry(dentry->d_subdirs.next,
+					    struct dentry, d_u.d_child);
+		}
+
+		/* consume the dentries from this leaf up through its parents
+		 * until we find one with children or run out altogether */
+		do {
+			struct inode *inode;
+
+			if (atomic_read(&dentry->d_count) != 0) {
+				printk(KERN_ERR
+				       "BUG: Dentry %p{i=%lx,n=%s}"
+				       " still in use (%d)"
+				       " [unmount of %s %s]\n",
+				       dentry,
+				       dentry->d_inode ?
+				       dentry->d_inode->i_ino : 0UL,
+				       dentry->d_name.name,
+				       atomic_read(&dentry->d_count),
+				       dentry->d_sb->s_type->name,
+				       dentry->d_sb->s_id);
+				BUG();
+			}
+
+			parent = dentry->d_parent;
+			if (parent == dentry)
+				parent = NULL;
+			else
+				atomic_dec(&parent->d_count);
+
+			list_del(&dentry->d_u.d_child);
+			dentry_stat.nr_dentry--;	/* For d_free, below */
+
+			inode = dentry->d_inode;
+			if (inode) {
+#ifdef CONFIG_INOTIFY
+				BUG_ON(!list_empty(&inode->inotify_watches));
+#endif
+				dentry->d_inode = NULL;
+				list_del_init(&dentry->d_alias);
+				if (dentry->d_op && dentry->d_op->d_iput)
+					dentry->d_op->d_iput(dentry, inode);
+				else
+					iput(inode);
+			}
+
+			d_free(dentry);
+
+			/* finished when we fall off the top of the tree,
+			 * otherwise we ascend to the parent and move to the
+			 * next sibling if there is one */
+			if (!parent)
+				return;
+
+			dentry = parent;
+
+		} while (list_empty(&dentry->d_subdirs));
+
+		dentry = list_entry(dentry->d_subdirs.next,
+				    struct dentry, d_u.d_child);
+	}
+}
+
+/*
+ * destroy the dentries attached to a superblock on unmounting
+ * - we don't need to use dentry->d_lock, and only need dcache_lock when
+ *   removing the dentry from the system lists and hashes because:
+ *   - the superblock is detached from all mountings and open files, so the
+ *     dentry trees will not be rearranged by the VFS
+ *   - s_umount is write-locked, so the memory pressure shrinker will ignore
+ *     any dentries belonging to this superblock that it comes across
+ *   - the filesystem itself is no longer permitted to rearrange the dentries
+ *     in this superblock
+ */
+void shrink_dcache_for_umount(struct super_block *sb)
+{
+	struct dentry *dentry;
+
+	if (down_read_trylock(&sb->s_umount))
+		BUG();
+
+	dentry = sb->s_root;
+	sb->s_root = NULL;
+	atomic_dec(&dentry->d_count);
+	shrink_dcache_for_umount_subtree(dentry);
+
+	while (!hlist_empty(&sb->s_anon)) {
+		dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
+		shrink_dcache_for_umount_subtree(dentry);
+	}
+}
+
+/*
  * Search for at least 1 mount point in the dentry's subdirs.
  * We descend to the next level whenever the d_subdirs
  * list is non-empty and continue searching.
@@ -828,17 +961,19 @@ void d_instantiate(struct dentry *entry,
  * (or otherwise set) by the caller to indicate that it is now
  * in use by the dcache.
  */
-struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+static struct dentry *__d_instantiate_unique(struct dentry *entry,
+					     struct inode *inode)
 {
 	struct dentry *alias;
 	int len = entry->d_name.len;
 	const char *name = entry->d_name.name;
 	unsigned int hash = entry->d_name.hash;
 
-	BUG_ON(!list_empty(&entry->d_alias));
-	spin_lock(&dcache_lock);
-	if (!inode)
-		goto do_negative;
+	if (!inode) {
+		entry->d_inode = NULL;
+		return NULL;
+	}
+
 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
 		struct qstr *qstr = &alias->d_name;
 
@@ -851,19 +986,35 @@ struct dentry *d_instantiate_unique(stru
 		if (memcmp(qstr->name, name, len))
 			continue;
 		dget_locked(alias);
-		spin_unlock(&dcache_lock);
-		BUG_ON(!d_unhashed(alias));
-		iput(inode);
 		return alias;
 	}
+
 	list_add(&entry->d_alias, &inode->i_dentry);
-do_negative:
 	entry->d_inode = inode;
 	fsnotify_d_instantiate(entry, inode);
-	spin_unlock(&dcache_lock);
-	security_d_instantiate(entry, inode);
 	return NULL;
 }
+
+struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+{
+	struct dentry *result;
+
+	BUG_ON(!list_empty(&entry->d_alias));
+
+	spin_lock(&dcache_lock);
+	result = __d_instantiate_unique(entry, inode);
+	spin_unlock(&dcache_lock);
+
+	if (!result) {
+		security_d_instantiate(entry, inode);
+		return NULL;
+	}
+
+	BUG_ON(!d_unhashed(result));
+	iput(inode);
+	return result;
+}
+
 EXPORT_SYMBOL(d_instantiate_unique);
 
 /**
@@ -1235,6 +1386,11 @@ static void __d_rehash(struct dentry * e
  	hlist_add_head_rcu(&entry->d_hash, list);
 }
 
+static void _d_rehash(struct dentry * entry)
+{
+	__d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
+}
+
 /**
  * d_rehash	- add an entry back to the hash
  * @entry: dentry to add to the hash
@@ -1244,11 +1400,9 @@ static void __d_rehash(struct dentry * e
  
 void d_rehash(struct dentry * entry)
 {
-	struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-
 	spin_lock(&dcache_lock);
 	spin_lock(&entry->d_lock);
-	__d_rehash(entry, list);
+	_d_rehash(entry);
 	spin_unlock(&entry->d_lock);
 	spin_unlock(&dcache_lock);
 }
@@ -1386,6 +1540,120 @@ already_unhashed:
 	spin_unlock(&dcache_lock);
 }
 
+/*
+ * Prepare an anonymous dentry for life in the superblock's dentry tree as a
+ * named dentry in place of the dentry to be replaced.
+ */
+static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
+{
+	struct dentry *dparent, *aparent;
+
+	switch_names(dentry, anon);
+	do_switch(dentry->d_name.len, anon->d_name.len);
+	do_switch(dentry->d_name.hash, anon->d_name.hash);
+
+	dparent = dentry->d_parent;
+	aparent = anon->d_parent;
+
+	dentry->d_parent = (aparent == anon) ? dentry : aparent;
+	list_del(&dentry->d_u.d_child);
+	if (!IS_ROOT(dentry))
+		list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+	else
+		INIT_LIST_HEAD(&dentry->d_u.d_child);
+
+	anon->d_parent = (dparent == dentry) ? anon : dparent;
+	list_del(&anon->d_u.d_child);
+	if (!IS_ROOT(anon))
+		list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
+	else
+		INIT_LIST_HEAD(&anon->d_u.d_child);
+
+	anon->d_flags &= ~DCACHE_DISCONNECTED;
+}
+
+/**
+ * d_materialise_unique - introduce an inode into the tree
+ * @dentry: candidate dentry
+ * @inode: inode to bind to the dentry, to which aliases may be attached
+ *
+ * Introduces an dentry into the tree, substituting an extant disconnected
+ * root directory alias in its place if there is one
+ */
+struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
+{
+	struct dentry *alias, *actual;
+
+	BUG_ON(!d_unhashed(dentry));
+
+	spin_lock(&dcache_lock);
+
+	if (!inode) {
+		actual = dentry;
+		dentry->d_inode = NULL;
+		goto found_lock;
+	}
+
+	/* See if a disconnected directory already exists as an anonymous root
+	 * that we should splice into the tree instead */
+	if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) {
+		spin_lock(&alias->d_lock);
+
+		/* Is this a mountpoint that we could splice into our tree? */
+		if (IS_ROOT(alias))
+			goto connect_mountpoint;
+
+		if (alias->d_name.len == dentry->d_name.len &&
+		    alias->d_parent == dentry->d_parent &&
+		    memcmp(alias->d_name.name,
+			   dentry->d_name.name,
+			   dentry->d_name.len) == 0)
+			goto replace_with_alias;
+
+		spin_unlock(&alias->d_lock);
+
+		/* Doh! Seem to be aliasing directories for some reason... */
+		dput(alias);
+	}
+
+	/* Add a unique reference */
+	actual = __d_instantiate_unique(dentry, inode);
+	if (!actual)
+		actual = dentry;
+	else if (unlikely(!d_unhashed(actual)))
+		goto shouldnt_be_hashed;
+
+found_lock:
+	spin_lock(&actual->d_lock);
+found:
+	_d_rehash(actual);
+	spin_unlock(&actual->d_lock);
+	spin_unlock(&dcache_lock);
+
+	if (actual == dentry) {
+		security_d_instantiate(dentry, inode);
+		return NULL;
+	}
+
+	iput(inode);
+	return actual;
+
+	/* Convert the anonymous/root alias into an ordinary dentry */
+connect_mountpoint:
+	__d_materialise_dentry(dentry, alias);
+
+	/* Replace the candidate dentry with the alias in the tree */
+replace_with_alias:
+	__d_drop(alias);
+	actual = alias;
+	goto found;
+
+shouldnt_be_hashed:
+	spin_unlock(&dcache_lock);
+	BUG();
+	goto shouldnt_be_hashed;
+}
+
 /**
  * d_path - return the path of a dentry
  * @dentry: dentry to report
@@ -1784,6 +2052,7 @@ EXPORT_SYMBOL(d_instantiate);
 EXPORT_SYMBOL(d_invalidate);
 EXPORT_SYMBOL(d_lookup);
 EXPORT_SYMBOL(d_move);
+EXPORT_SYMBOL_GPL(d_materialise_unique);
 EXPORT_SYMBOL(d_path);
 EXPORT_SYMBOL(d_prune_aliases);
 EXPORT_SYMBOL(d_rehash);
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 92ea826..b0e7d9f 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -112,7 +112,7 @@ static void ext2_check_page(struct page 
 	if (offs != limit)
 		goto Eend;
 out:
-	SetPageChecked(page);
+	SetPageFsMisc(page);
 	return;
 
 	/* Too bad, we had an error */
@@ -152,7 +152,7 @@ Eend:
 		dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
 		(unsigned long) le32_to_cpu(p->inode));
 fail:
-	SetPageChecked(page);
+	SetPageFsMisc(page);
 	SetPageError(page);
 }
 
@@ -165,7 +165,7 @@ static struct page * ext2_get_page(struc
 		kmap(page);
 		if (!PageUptodate(page))
 			goto fail;
-		if (!PageChecked(page))
+		if (!PageFsMisc(page))
 			ext2_check_page(page);
 		if (PageError(page))
 			goto fail;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index f804d5e..8073fc9 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1527,12 +1527,12 @@ static int ext3_journalled_writepage(str
 		goto no_write;
 	}
 
-	if (!page_has_buffers(page) || PageChecked(page)) {
+	if (!page_has_buffers(page) || PageFsMisc(page)) {
 		/*
 		 * It's mmapped pagecache.  Add buffers and journal it.  There
 		 * doesn't seem much point in redirtying the page here.
 		 */
-		ClearPageChecked(page);
+		ClearPageFsMisc(page);
 		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
 					ext3_get_block);
 		if (ret != 0) {
@@ -1589,7 +1589,7 @@ static void ext3_invalidatepage(struct p
 	 * If it's a full truncate we just forget about the pending dirtying
 	 */
 	if (offset == 0)
-		ClearPageChecked(page);
+		ClearPageFsMisc(page);
 
 	journal_invalidatepage(journal, page, offset);
 }
@@ -1598,7 +1598,7 @@ static int ext3_releasepage(struct page 
 {
 	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
 
-	WARN_ON(PageChecked(page));
+	WARN_ON(PageFsMisc(page));
 	if (!page_has_buffers(page))
 		return 0;
 	return journal_try_to_free_buffers(journal, page, wait);
@@ -1694,7 +1694,7 @@ out:
  */
 static int ext3_journalled_set_page_dirty(struct page *page)
 {
-	SetPageChecked(page);
+	SetPageFsMisc(page);
 	return __set_page_dirty_nobuffers(page);
 }
 
diff --git a/fs/fcntl.c b/fs/fcntl.c
index d35cbc6..b43d821 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -529,6 +529,8 @@ int send_sigurg(struct fown_struct *fown
 	return ret;
 }
 
+EXPORT_SYMBOL(send_sigurg);
+
 static DEFINE_RWLOCK(fasync_lock);
 static kmem_cache_t *fasync_cache __read_mostly;
 
diff --git a/fs/file_table.c b/fs/file_table.c
index 0131ba0..6861eb9 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -29,10 +29,13 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
+struct files_kernel_stat_struct files_kernel_stat;
+
 /* public. Not pretty! */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
 
 static struct percpu_counter nr_files __cacheline_aligned_in_smp;
+static atomic_t nr_kernel_files;
 
 static inline void file_free_rcu(struct rcu_head *head)
 {
@@ -42,7 +45,10 @@ static inline void file_free_rcu(struct 
 
 static inline void file_free(struct file *f)
 {
-	percpu_counter_dec(&nr_files);
+	if (f->f_kernel_flags & FKFLAGS_NO_ENFILE)
+		atomic_dec(&nr_kernel_files);
+	else
+		percpu_counter_dec(&nr_files);
 	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
 }
 
@@ -73,45 +79,64 @@ int proc_nr_files(ctl_table *table, int 
 	files_stat.nr_files = get_nr_files();
 	return proc_dointvec(table, write, filp, buffer, lenp, ppos);
 }
+int proc_files_kernel(ctl_table *table, int write, struct file *filp,
+		      void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	files_kernel_stat.nr_kernel_files = atomic_read(&nr_kernel_files);
+	return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
 #else
 int proc_nr_files(ctl_table *table, int write, struct file *filp,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
+int proc_files_kernel(ctl_table *table, int write, struct file *filp,
+		      void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
 #endif
 
 /* Find an unused file structure and return a pointer to it.
  * Returns NULL, if there are no more free file structures or
  * we run out of memory.
  */
-struct file *get_empty_filp(void)
+struct file *get_empty_kernel_filp(unsigned short kflags)
 {
 	struct task_struct *tsk;
 	static int old_max;
 	struct file * f;
 
 	/*
-	 * Privileged users can go above max_files
+	 * Privileged users can go above max_files and internal kernel users
+	 * can avoid it completely
 	 */
-	if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
+	if (!(kflags & FKFLAGS_NO_ENFILE) &&
+	    get_nr_files() >= files_stat.max_files &&
+	    !capable(CAP_SYS_ADMIN)
+	    ) {
 		/*
-		 * percpu_counters are inaccurate.  Do an expensive check before
-		 * we go and fail.
+		 * percpu_counters are inaccurate.  Do an expensive
+		 * check before we go and fail.
 		 */
 		if (percpu_counter_sum(&nr_files) >= files_stat.max_files)
 			goto over;
 	}
 
-	f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
+	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
 	if (f == NULL)
 		goto fail;
 
-	percpu_counter_inc(&nr_files);
-	memset(f, 0, sizeof(*f));
+	if (kflags & FKFLAGS_NO_ENFILE)
+		atomic_inc(&nr_kernel_files);
+	else
+		percpu_counter_inc(&nr_files);
+
 	if (security_file_alloc(f))
 		goto fail_sec;
 
+	f->f_kernel_flags = kflags;
 	tsk = current;
 	INIT_LIST_HEAD(&f->f_u.fu_list);
 	atomic_set(&f->f_count, 1);
@@ -137,6 +162,11 @@ fail:
 	return NULL;
 }
 
+struct file *get_empty_filp(void)
+{
+	return get_empty_kernel_filp(0);
+}
+
 EXPORT_SYMBOL(get_empty_filp);
 
 void fastcall fput(struct file *file)
@@ -234,6 +264,7 @@ struct file fastcall *fget_light(unsigne
 	return file;
 }
 
+EXPORT_SYMBOL_GPL(fget_light);
 
 void put_filp(struct file *file)
 {
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index decac62..805bbb2 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -78,7 +78,7 @@ vxfs_get_page(struct address_space *mapp
 		kmap(pp);
 		if (!PageUptodate(pp))
 			goto fail;
-		/** if (!PageChecked(pp)) **/
+		/** if (!PageFsMisc(pp)) **/
 			/** vxfs_check_page(pp); **/
 		if (PageError(pp))
 			goto fail;
diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile
new file mode 100644
index 0000000..10f17a3
--- /dev/null
+++ b/fs/fscache/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for general filesystem caching code
+#
+
+fscache-objs := \
+	cookie.o \
+	fsdef.o \
+	main.o \
+	page.o
+
+obj-$(CONFIG_FSCACHE) := fscache.o
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
new file mode 100644
index 0000000..ea017b7
--- /dev/null
+++ b/fs/fscache/cookie.c
@@ -0,0 +1,1063 @@
+/* cookie.c: general filesystem cache cookie management
+ *
+ * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include "fscache-int.h"
+
+static LIST_HEAD(fscache_cache_tag_list);
+static LIST_HEAD(fscache_cache_list);
+static LIST_HEAD(fscache_netfs_list);
+static DECLARE_RWSEM(fscache_addremove_sem);
+static struct fscache_cache_tag fscache_nomem_tag;
+
+kmem_cache_t *fscache_cookie_jar;
+
+static void fscache_withdraw_object(struct fscache_cache *cache,
+				    struct fscache_object *object);
+
+static void __fscache_cookie_put(struct fscache_cookie *cookie);
+
+static inline void fscache_cookie_put(struct fscache_cookie *cookie)
+{
+	/* check to see whether the cookie has already been released by looking
+	 * for the poison when slab debugging is on */
+#ifdef CONFIG_DEBUG_SLAB
+	BUG_ON((atomic_read(&cookie->usage) & 0xffff0000) == 0x6b6b0000);
+#endif
+
+	BUG_ON(atomic_read(&cookie->usage) <= 0);
+
+	if (atomic_dec_and_test(&cookie->usage))
+		__fscache_cookie_put(cookie);
+
+}
+
+/*****************************************************************************/
+/*
+ * look up a cache tag
+ */
+struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *name)
+{
+	struct fscache_cache_tag *tag, *xtag;
+
+	/* firstly check for the existence of the tag under read lock */
+	down_read(&fscache_addremove_sem);
+
+	list_for_each_entry(tag, &fscache_cache_tag_list, link) {
+		if (strcmp(tag->name, name) == 0) {
+			atomic_inc(&tag->usage);
+			up_read(&fscache_addremove_sem);
+			return tag;
+		}
+	}
+
+	up_read(&fscache_addremove_sem);
+
+	/* the tag does not exist - create a candidate */
+	xtag = kmalloc(sizeof(*xtag) + strlen(name) + 1, GFP_KERNEL);
+	if (!xtag)
+		/* return a dummy tag if out of memory */
+		return &fscache_nomem_tag;
+
+	atomic_set(&xtag->usage, 1);
+	strcpy(xtag->name, name);
+
+	/* write lock, search again and add if still not present */
+	down_write(&fscache_addremove_sem);
+
+	list_for_each_entry(tag, &fscache_cache_tag_list, link) {
+		if (strcmp(tag->name, name) == 0) {
+			atomic_inc(&tag->usage);
+			up_write(&fscache_addremove_sem);
+			kfree(xtag);
+			return tag;
+		}
+	}
+
+	list_add_tail(&xtag->link, &fscache_cache_tag_list);
+	up_write(&fscache_addremove_sem);
+	return xtag;
+
+} /* end __fscache_lookup_cache_tag() */
+
+/*****************************************************************************/
+/*
+ * release a reference to a cache tag
+ */
+void __fscache_release_cache_tag(struct fscache_cache_tag *tag)
+{
+	if (tag != &fscache_nomem_tag) {
+		down_write(&fscache_addremove_sem);
+
+		if (atomic_dec_and_test(&tag->usage))
+			list_del_init(&tag->link);
+		else
+			tag = NULL;
+
+		up_write(&fscache_addremove_sem);
+
+		kfree(tag);
+	}
+
+} /* end __fscache_release_cache_tag() */
+
+/*****************************************************************************/
+/*
+ * register a network filesystem for caching
+ */
+int __fscache_register_netfs(struct fscache_netfs *netfs)
+{
+	struct fscache_netfs *ptr;
+	int ret;
+
+	_enter("{%s}", netfs->name);
+
+	INIT_LIST_HEAD(&netfs->link);
+
+	/* allocate a cookie for the primary index */
+	netfs->primary_index =
+		kmem_cache_zalloc(fscache_cookie_jar, SLAB_KERNEL);
+
+	if (!netfs->primary_index) {
+		_leave(" = -ENOMEM");
+		return -ENOMEM;
+	}
+
+	/* initialise the primary index cookie */
+	atomic_set(&netfs->primary_index->usage, 1);
+	atomic_set(&netfs->primary_index->children, 0);
+
+	netfs->primary_index->def		= &fscache_fsdef_netfs_def;
+	netfs->primary_index->parent		= &fscache_fsdef_index;
+	netfs->primary_index->netfs		= netfs;
+	netfs->primary_index->netfs_data	= netfs;
+
+	atomic_inc(&netfs->primary_index->parent->usage);
+	atomic_inc(&netfs->primary_index->parent->children);
+
+	init_rwsem(&netfs->primary_index->sem);
+	INIT_HLIST_HEAD(&netfs->primary_index->backing_objects);
+
+	/* check the netfs type is not already present */
+	down_write(&fscache_addremove_sem);
+
+	ret = -EEXIST;
+	list_for_each_entry(ptr, &fscache_netfs_list, link) {
+		if (strcmp(ptr->name, netfs->name) == 0)
+			goto already_registered;
+	}
+
+	list_add(&netfs->link, &fscache_netfs_list);
+	ret = 0;
+
+	printk("FS-Cache: netfs '%s' registered for caching\n", netfs->name);
+
+already_registered:
+	up_write(&fscache_addremove_sem);
+
+	if (ret < 0) {
+		netfs->primary_index->parent = NULL;
+		__fscache_cookie_put(netfs->primary_index);
+		netfs->primary_index = NULL;
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end __fscache_register_netfs() */
+
+EXPORT_SYMBOL(__fscache_register_netfs);
+
+/*****************************************************************************/
+/*
+ * unregister a network filesystem from the cache
+ * - all cookies must have been released first
+ */
+void __fscache_unregister_netfs(struct fscache_netfs *netfs)
+{
+	_enter("{%s.%u}", netfs->name, netfs->version);
+
+	down_write(&fscache_addremove_sem);
+
+	list_del(&netfs->link);
+	fscache_relinquish_cookie(netfs->primary_index, 0);
+
+	up_write(&fscache_addremove_sem);
+
+	printk("FS-Cache: netfs '%s' unregistered from caching\n",
+	       netfs->name);
+
+	_leave("");
+
+} /* end __fscache_unregister_netfs() */
+
+EXPORT_SYMBOL(__fscache_unregister_netfs);
+
+/*****************************************************************************/
+/*
+ * initialise a cache record
+ */
+void fscache_init_cache(struct fscache_cache *cache,
+			struct fscache_cache_ops *ops,
+			const char *idfmt,
+			...)
+{
+	va_list va;
+
+	memset(cache, 0, sizeof(*cache));
+
+	cache->ops = ops;
+
+	va_start(va, idfmt);
+	vsnprintf(cache->identifier, sizeof(cache->identifier), idfmt, va);
+	va_end(va);
+
+	INIT_LIST_HEAD(&cache->link);
+	INIT_LIST_HEAD(&cache->object_list);
+	spin_lock_init(&cache->object_list_lock);
+	init_rwsem(&cache->withdrawal_sem);
+
+} /* end fscache_init_cache() */
+
+EXPORT_SYMBOL(fscache_init_cache);
+
+/*****************************************************************************/
+/*
+ * declare a mounted cache as being open for business
+ */
+int fscache_add_cache(struct fscache_cache *cache,
+		      struct fscache_object *ifsdef,
+		      const char *tagname)
+{
+	struct fscache_cache_tag *tag;
+
+	BUG_ON(!cache->ops);
+	BUG_ON(!ifsdef);
+
+	cache->flags = 0;
+
+	if (!tagname)
+		tagname = cache->identifier;
+
+	BUG_ON(!tagname[0]);
+
+	_enter("{%s.%s},,%s", cache->ops->name, cache->identifier, tagname);
+
+	if (!cache->ops->grab_object(ifsdef))
+		BUG();
+
+	ifsdef->cookie = &fscache_fsdef_index;
+	ifsdef->cache = cache;
+	cache->fsdef = ifsdef;
+
+	down_write(&fscache_addremove_sem);
+
+	/* instantiate or allocate a cache tag */
+	list_for_each_entry(tag, &fscache_cache_tag_list, link) {
+		if (strcmp(tag->name, tagname) == 0) {
+			if (tag->cache) {
+				printk(KERN_ERR
+				       "FS-Cache: cache tag '%s' already in use\n",
+				       tagname);
+				up_write(&fscache_addremove_sem);
+				return -EEXIST;
+			}
+
+			atomic_inc(&tag->usage);
+			goto found_cache_tag;
+		}
+	}
+
+	tag = kmalloc(sizeof(*tag) + strlen(tagname) + 1, GFP_KERNEL);
+	if (!tag) {
+		up_write(&fscache_addremove_sem);
+		return -ENOMEM;
+	}
+
+	atomic_set(&tag->usage, 1);
+	strcpy(tag->name, tagname);
+	list_add_tail(&tag->link, &fscache_cache_tag_list);
+
+found_cache_tag:
+	tag->cache = cache;
+	cache->tag = tag;
+
+	/* add the cache to the list */
+	list_add(&cache->link, &fscache_cache_list);
+
+	/* add the cache's netfs definition index object to the cache's
+	 * list */
+	spin_lock(&cache->object_list_lock);
+	list_add_tail(&ifsdef->cache_link, &cache->object_list);
+	spin_unlock(&cache->object_list_lock);
+
+	/* add the cache's netfs definition index object to the top level index
+	 * cookie as a known backing object */
+	down_write(&fscache_fsdef_index.sem);
+
+	hlist_add_head(&ifsdef->cookie_link,
+		       &fscache_fsdef_index.backing_objects);
+
+	atomic_inc(&fscache_fsdef_index.usage);
+
+	/* done */
+	up_write(&fscache_fsdef_index.sem);
+	up_write(&fscache_addremove_sem);
+
+	printk(KERN_NOTICE
+	       "FS-Cache: Cache \"%s\" added (type %s)\n",
+	       cache->tag->name, cache->ops->name);
+
+	_leave(" = 0 [%s]", cache->identifier);
+	return 0;
+
+} /* end fscache_add_cache() */
+
+EXPORT_SYMBOL(fscache_add_cache);
+
+/*****************************************************************************/
+/*
+ * note a cache I/O error
+ */
+void fscache_io_error(struct fscache_cache *cache)
+{
+	set_bit(FSCACHE_IOERROR, &cache->flags);
+
+	printk(KERN_ERR "FS-Cache: Cache %s stopped due to I/O error\n",
+	       cache->ops->name);
+
+} /* end fscache_io_error() */
+
+EXPORT_SYMBOL(fscache_io_error);
+
+/*****************************************************************************/
+/*
+ * withdraw an unmounted cache from the active service
+ */
+void fscache_withdraw_cache(struct fscache_cache *cache)
+{
+	struct fscache_object *object;
+
+	_enter("");
+
+	printk(KERN_NOTICE
+	       "FS-Cache: Withdrawing cache \"%s\"\n",
+	       cache->tag->name);
+
+	/* make the cache unavailable for cookie acquisition */
+	down_write(&cache->withdrawal_sem);
+
+	down_write(&fscache_addremove_sem);
+	list_del_init(&cache->link);
+	cache->tag->cache = NULL;
+	up_write(&fscache_addremove_sem);
+
+	/* mark all objects as being withdrawn */
+	spin_lock(&cache->object_list_lock);
+	list_for_each_entry(object, &cache->object_list, cache_link) {
+		set_bit(FSCACHE_OBJECT_WITHDRAWN, &object->flags);
+	}
+	spin_unlock(&cache->object_list_lock);
+
+	/* make sure all pages pinned by operations on behalf of the netfs are
+	 * written to disc */
+	cache->ops->sync_cache(cache);
+
+	/* dissociate all the netfs pages backed by this cache from the block
+	 * mappings in the cache */
+	cache->ops->dissociate_pages(cache);
+
+	/* we now have to destroy all the active objects pertaining to this
+	 * cache */
+	spin_lock(&cache->object_list_lock);
+
+	while (!list_empty(&cache->object_list)) {
+		object = list_entry(cache->object_list.next,
+				    struct fscache_object, cache_link);
+		list_del_init(&object->cache_link);
+		spin_unlock(&cache->object_list_lock);
+
+		_debug("withdraw %p", object->cookie);
+
+		/* we've extracted an active object from the tree - now dispose
+		 * of it */
+		fscache_withdraw_object(cache, object);
+
+		spin_lock(&cache->object_list_lock);
+	}
+
+	spin_unlock(&cache->object_list_lock);
+
+	fscache_release_cache_tag(cache->tag);
+	cache->tag = NULL;
+
+	_leave("");
+
+} /* end fscache_withdraw_cache() */
+
+EXPORT_SYMBOL(fscache_withdraw_cache);
+
+/*****************************************************************************/
+/*
+ * withdraw an object from active service at the behest of the cache
+ * - need break the links to a cached object cookie
+ * - called under two situations:
+ *   (1) recycler decides to reclaim an in-use object
+ *   (2) a cache is unmounted
+ * - have to take care as the cookie can be being relinquished by the netfs
+ *   simultaneously
+ * - the active object is pinned by the caller holding a refcount on it
+ */
+static void fscache_withdraw_object(struct fscache_cache *cache,
+				    struct fscache_object *object)
+{
+	struct fscache_cookie *cookie, *xcookie = NULL;
+
+	_enter(",%p", object);
+
+	/* first of all we have to break the links between the object and the
+	 * cookie
+	 * - we have to hold both semaphores BUT we have to get the cookie sem
+	 *   FIRST
+	 */
+	cache->ops->lock_object(object);
+
+	cookie = object->cookie;
+	if (cookie) {
+		/* pin the cookie so that is doesn't escape */
+		atomic_inc(&cookie->usage);
+
+		/* re-order the locks to avoid deadlock */
+		cache->ops->unlock_object(object);
+		down_write(&cookie->sem);
+		cache->ops->lock_object(object);
+
+		/* erase references from the object to the cookie */
+		hlist_del_init(&object->cookie_link);
+
+		xcookie = object->cookie;
+		object->cookie = NULL;
+
+		up_write(&cookie->sem);
+	}
+
+	cache->ops->unlock_object(object);
+
+	/* we've broken the links between cookie and object */
+	if (xcookie) {
+		fscache_cookie_put(xcookie);
+		cache->ops->put_object(object);
+	}
+
+	/* unpin the cookie */
+	if (cookie) {
+		if (cookie->def && cookie->def->now_uncached)
+			cookie->def->now_uncached(cookie->netfs_data);
+		fscache_cookie_put(cookie);
+	}
+
+	_leave("");
+
+} /* end fscache_withdraw_object() */
+
+/*****************************************************************************/
+/*
+ * select a cache on which to store an object
+ * - the cache addremove semaphore must be at least read-locked by the caller
+ * - the object will never be an index
+ */
+static struct fscache_cache *fscache_select_cache_for_object(struct fscache_cookie *cookie)
+{
+	struct fscache_cache_tag *tag;
+	struct fscache_object *object;
+	struct fscache_cache *cache;
+
+	_enter("");
+
+	if (list_empty(&fscache_cache_list)) {
+		_leave(" = NULL [no cache]");
+		return NULL;
+	}
+
+	/* we check the parent to determine the cache to use */
+	down_read(&cookie->parent->sem);
+
+	/* the first in the parent's backing list should be the preferred
+	 * cache */
+	if (!hlist_empty(&cookie->parent->backing_objects)) {
+		object = hlist_entry(cookie->parent->backing_objects.first,
+				     struct fscache_object, cookie_link);
+
+		cache = object->cache;
+		if (test_bit(FSCACHE_IOERROR, &cache->flags))
+			cache = NULL;
+
+		up_read(&cookie->parent->sem);
+		_leave(" = %p [parent]", cache);
+		return cache;
+	}
+
+	/* the parent is unbacked */
+	if (cookie->parent->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
+		/* parent not an index and is unbacked */
+		up_read(&cookie->parent->sem);
+		_leave(" = NULL [parent ubni]");
+		return NULL;
+	}
+
+	up_read(&cookie->parent->sem);
+
+	if (!cookie->parent->def->select_cache)
+		goto no_preference;
+
+	/* ask the netfs for its preference */
+	tag = cookie->parent->def->select_cache(
+		cookie->parent->parent->netfs_data,
+		cookie->parent->netfs_data);
+
+	if (!tag)
+		goto no_preference;
+
+	if (tag == &fscache_nomem_tag) {
+		_leave(" = NULL [nomem tag]");
+		return NULL;
+	}
+
+	if (!tag->cache) {
+		_leave(" = NULL [unbacked tag]");
+		return NULL;
+	}
+
+	if (test_bit(FSCACHE_IOERROR, &tag->cache->flags))
+		return NULL;
+
+	_leave(" = %p [specific]", tag->cache);
+	return tag->cache;
+
+no_preference:
+	/* netfs has no preference - just select first cache */
+	cache = list_entry(fscache_cache_list.next,
+			   struct fscache_cache, link);
+	_leave(" = %p [first]", cache);
+	return cache;
+
+} /* end fscache_select_cache_for_object() */
+
+/*****************************************************************************/
+/*
+ * get a backing object for a cookie from the chosen cache
+ * - the cookie must be write-locked by the caller
+ * - all parent indexes will be obtained recursively first
+ */
+static struct fscache_object *fscache_lookup_object(struct fscache_cookie *cookie,
+						    struct fscache_cache *cache)
+{
+	struct fscache_cookie *parent = cookie->parent;
+	struct fscache_object *pobject, *object;
+	struct hlist_node *_p;
+
+	_enter("{%s/%s},",
+	       parent && parent->def ? parent->def->name : "",
+	       cookie->def ? (char *) cookie->def->name : "<file>");
+
+	if (test_bit(FSCACHE_IOERROR, &cache->flags))
+		return NULL;
+
+	/* see if we have the backing object for this cookie + cache immediately
+	 * to hand
+	 */
+	object = NULL;
+	hlist_for_each_entry(object, _p,
+			     &cookie->backing_objects, cookie_link
+			     ) {
+		if (object->cache == cache)
+			break;
+	}
+
+	if (object) {
+		_leave(" = %p [old]", object);
+		return object;
+	}
+
+	BUG_ON(!parent); /* FSDEF entries don't have a parent */
+
+	/* we don't have a backing cookie, so we need to consult the object's
+	 * parent index in the selected cache and maybe insert an entry
+	 * therein; so the first thing to do is make sure that the parent index
+	 * is represented on disc
+	 */
+	down_read(&parent->sem);
+
+	pobject = NULL;
+	hlist_for_each_entry(pobject, _p,
+			     &parent->backing_objects, cookie_link
+			     ) {
+		if (pobject->cache == cache)
+			break;
+	}
+
+	if (!pobject) {
+		/* we don't know about the parent object */
+		up_read(&parent->sem);
+		down_write(&parent->sem);
+
+		pobject = fscache_lookup_object(parent, cache);
+		if (IS_ERR(pobject)) {
+			up_write(&parent->sem);
+			_leave(" = %ld [no ipobj]", PTR_ERR(pobject));
+			return pobject;
+		}
+
+		_debug("pobject=%p", pobject);
+
+		BUG_ON(pobject->cookie != parent);
+
+		downgrade_write(&parent->sem);
+	}
+
+	/* now we can attempt to look up this object in the parent, possibly
+	 * creating a representation on disc when we do so
+	 */
+	object = cache->ops->lookup_object(cache, pobject, cookie);
+	up_read(&parent->sem);
+
+	if (IS_ERR(object)) {
+		_leave(" = %ld [no obj]", PTR_ERR(object));
+		return object;
+	}
+
+	/* keep track of it */
+	cache->ops->lock_object(object);
+
+	BUG_ON(!hlist_unhashed(&object->cookie_link));
+
+	/* attach to the cache's object list */
+	if (list_empty(&object->cache_link)) {
+		spin_lock(&cache->object_list_lock);
+		list_add(&object->cache_link, &cache->object_list);
+		spin_unlock(&cache->object_list_lock);
+	}
+
+	/* attach to the cookie */
+	object->cookie = cookie;
+	atomic_inc(&cookie->usage);
+	hlist_add_head(&object->cookie_link, &cookie->backing_objects);
+
+	/* done */
+	cache->ops->unlock_object(object);
+	_leave(" = %p [new]", object);
+	return object;
+
+} /* end fscache_lookup_object() */
+
+/*****************************************************************************/
+/*
+ * request a cookie to represent an object (index, datafile, xattr, etc)
+ * - parent specifies the parent object
+ *   - the top level index cookie for each netfs is stored in the fscache_netfs
+ *     struct upon registration
+ * - idef points to the definition
+ * - the netfs_data will be passed to the functions pointed to in *def
+ * - all attached caches will be searched to see if they contain this object
+ * - index objects aren't stored on disk until there's a dependent file that
+ *   needs storing
+ * - other objects are stored in a selected cache immediately, and all the
+ *   indexes forming the path to it are instantiated if necessary
+ * - we never let on to the netfs about errors
+ *   - we may set a negative cookie pointer, but that's okay
+ */
+struct fscache_cookie *__fscache_acquire_cookie(struct fscache_cookie *parent,
+						struct fscache_cookie_def *def,
+						void *netfs_data)
+{
+	struct fscache_cookie *cookie;
+	struct fscache_cache *cache;
+	struct fscache_object *object;
+	int ret = 0;
+
+	BUG_ON(!def);
+
+	_enter("{%s},{%s},%p",
+	       parent ? (char *) parent->def->name : "<no-parent>",
+	       def->name, netfs_data);
+
+	/* if there's no parent cookie, then we don't create one here either */
+	if (!parent) {
+		_leave(" [no parent]");
+		return NULL;
+	}
+
+	/* validate the definition */
+	BUG_ON(!def->get_key);
+	BUG_ON(!def->name[0]);
+
+	BUG_ON(def->type == FSCACHE_COOKIE_TYPE_INDEX &&
+	       parent->def->type != FSCACHE_COOKIE_TYPE_INDEX);
+
+	/* allocate and initialise a cookie */
+	cookie = kmem_cache_alloc(fscache_cookie_jar, SLAB_KERNEL);
+	if (!cookie) {
+		_leave(" [ENOMEM]");
+		return NULL;
+	}
+
+	atomic_set(&cookie->usage, 1);
+	atomic_set(&cookie->children, 0);
+
+	atomic_inc(&parent->usage);
+	atomic_inc(&parent->children);
+
+	cookie->def		= def;
+	cookie->parent		= parent;
+	cookie->netfs		= parent->netfs;
+	cookie->netfs_data	= netfs_data;
+
+	/* now we need to see whether the backing objects for this cookie yet
+	 * exist, if not there'll be nothing to search */
+	down_read(&fscache_addremove_sem);
+
+	if (list_empty(&fscache_cache_list)) {
+		up_read(&fscache_addremove_sem);
+		_leave(" = %p [no caches]", cookie);
+		return cookie;
+	}
+
+	/* if the object is an index then we need do nothing more here - we
+	 * create indexes on disk when we need them as an index may exist in
+	 * multiple caches */
+	if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
+		down_write(&cookie->sem);
+
+		/* the object is a file - we need to select a cache in which to
+		 * store it */
+		cache = fscache_select_cache_for_object(cookie);
+		if (!cache)
+			goto no_cache; /* couldn't decide on a cache */
+
+		/* create a file index entry on disc, along with all the
+		 * indexes required to find it again later */
+		object = fscache_lookup_object(cookie, cache);
+		if (IS_ERR(object)) {
+			ret = PTR_ERR(object);
+			goto error;
+		}
+
+		up_write(&cookie->sem);
+	}
+out:
+	up_read(&fscache_addremove_sem);
+	_leave(" = %p", cookie);
+	return cookie;
+
+no_cache:
+	ret = -ENOMEDIUM;
+	goto error_cleanup;
+error:
+	printk(KERN_ERR "FS-Cache: error from cache: %d\n", ret);
+error_cleanup:
+	if (cookie) {
+		up_write(&cookie->sem);
+		__fscache_cookie_put(cookie);
+		cookie = NULL;
+		atomic_dec(&parent->children);
+	}
+
+	goto out;
+
+} /* end __fscache_acquire_cookie() */
+
+EXPORT_SYMBOL(__fscache_acquire_cookie);
+
+/*****************************************************************************/
+/*
+ * release a cookie back to the cache
+ * - the object will be marked as recyclable on disc if retire is true
+ * - all dependents of this cookie must have already been unregistered
+ *   (indexes/files/pages)
+ */
+void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)
+{
+	struct fscache_cache *cache;
+	struct fscache_object *object;
+	struct hlist_node *_p;
+
+	if (!cookie) {
+		_leave(" [no cookie]");
+		return;
+	}
+
+	_enter("%p{%s},%d", cookie, cookie->def->name, retire);
+
+	if (atomic_read(&cookie->children) != 0) {
+		printk("FS-Cache: cookie still has children\n");
+		BUG();
+	}
+
+	/* detach pointers back to the netfs */
+	down_write(&cookie->sem);
+
+	cookie->netfs_data	= NULL;
+	cookie->def		= NULL;
+
+	/* mark retired objects for recycling */
+	if (retire) {
+		hlist_for_each_entry(object, _p,
+				     &cookie->backing_objects,
+				     cookie_link
+				     ) {
+			set_bit(FSCACHE_OBJECT_RECYCLING, &object->flags);
+		}
+	}
+
+	/* break links with all the active objects */
+	while (!hlist_empty(&cookie->backing_objects)) {
+		object = hlist_entry(cookie->backing_objects.first,
+				     struct fscache_object,
+				     cookie_link);
+
+		/* detach each cache object from the object cookie */
+		set_bit(FSCACHE_OBJECT_RELEASING, &object->flags);
+
+		hlist_del_init(&object->cookie_link);
+
+		cache = object->cache;
+		cache->ops->lock_object(object);
+		object->cookie = NULL;
+		cache->ops->unlock_object(object);
+
+		if (atomic_dec_and_test(&cookie->usage))
+			/* the cookie refcount shouldn't be reduced to 0 yet */
+			BUG();
+
+		spin_lock(&cache->object_list_lock);
+		list_del_init(&object->cache_link);
+		spin_unlock(&cache->object_list_lock);
+
+		cache->ops->put_object(object);
+	}
+
+	up_write(&cookie->sem);
+
+	if (cookie->parent) {
+#ifdef CONFIG_DEBUG_SLAB
+		BUG_ON((atomic_read(&cookie->parent->children) & 0xffff0000) == 0x6b6b0000);
+#endif
+		atomic_dec(&cookie->parent->children);
+	}
+
+	/* finally dispose of the cookie */
+	fscache_cookie_put(cookie);
+
+	_leave("");
+
+} /* end __fscache_relinquish_cookie() */
+
+EXPORT_SYMBOL(__fscache_relinquish_cookie);
+
+/*****************************************************************************/
+/*
+ * update the index entries backing a cookie
+ */
+void __fscache_update_cookie(struct fscache_cookie *cookie)
+{
+	struct fscache_object *object;
+	struct hlist_node *_p;
+
+	if (!cookie) {
+		_leave(" [no cookie]");
+		return;
+	}
+
+	_enter("{%s}", cookie->def->name);
+
+	BUG_ON(!cookie->def->get_aux);
+
+	down_write(&cookie->sem);
+	down_read(&cookie->parent->sem);
+
+	/* update the index entry on disc in each cache backing this cookie */
+	hlist_for_each_entry(object, _p,
+			     &cookie->backing_objects, cookie_link
+			     ) {
+		if (!test_bit(FSCACHE_IOERROR, &object->cache->flags))
+			object->cache->ops->update_object(object);
+	}
+
+	up_read(&cookie->parent->sem);
+	up_write(&cookie->sem);
+	_leave("");
+
+} /* end __fscache_update_cookie() */
+
+EXPORT_SYMBOL(__fscache_update_cookie);
+
+/*****************************************************************************/
+/*
+ * destroy a cookie
+ */
+static void __fscache_cookie_put(struct fscache_cookie *cookie)
+{
+	struct fscache_cookie *parent;
+
+	_enter("%p", cookie);
+
+	for (;;) {
+		parent = cookie->parent;
+		BUG_ON(!hlist_empty(&cookie->backing_objects));
+		kmem_cache_free(fscache_cookie_jar, cookie);
+
+		if (!parent)
+			break;
+
+		cookie = parent;
+		BUG_ON(atomic_read(&cookie->usage) <= 0);
+		if (!atomic_dec_and_test(&cookie->usage))
+			break;
+	}
+
+	_leave("");
+
+} /* end __fscache_cookie_put() */
+
+/*****************************************************************************/
+/*
+ * initialise an cookie jar slab element prior to any use
+ */
+void fscache_cookie_init_once(void *_cookie, kmem_cache_t *cachep,
+			      unsigned long flags)
+{
+	struct fscache_cookie *cookie = _cookie;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		memset(cookie, 0, sizeof(*cookie));
+		init_rwsem(&cookie->sem);
+		INIT_HLIST_HEAD(&cookie->backing_objects);
+	}
+
+} /* end fscache_cookie_init_once() */
+
+/*****************************************************************************/
+/*
+ * pin an object into the cache
+ */
+int __fscache_pin_cookie(struct fscache_cookie *cookie)
+{
+	struct fscache_object *object;
+	int ret;
+
+	_enter("%p", cookie);
+
+	if (hlist_empty(&cookie->backing_objects)) {
+		_leave(" = -ENOBUFS");
+		return -ENOBUFS;
+	}
+
+	/* not supposed to use this for indexes */
+	BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX);
+
+	/* prevent the file from being uncached whilst we access it and exclude
+	 * read and write attempts on pages
+	 */
+	down_write(&cookie->sem);
+
+	ret = -ENOBUFS;
+	if (!hlist_empty(&cookie->backing_objects)) {
+		/* get and pin the backing object */
+		object = hlist_entry(cookie->backing_objects.first,
+				     struct fscache_object, cookie_link);
+
+		if (test_bit(FSCACHE_IOERROR, &object->cache->flags))
+			goto out;
+
+		if (!object->cache->ops->pin_object) {
+			ret = -EOPNOTSUPP;
+			goto out;
+		}
+
+		/* prevent the cache from being withdrawn */
+		if (fscache_operation_lock(object)) {
+			if (object->cache->ops->grab_object(object)) {
+				/* ask the cache to honour the operation */
+				ret = object->cache->ops->pin_object(object);
+
+				object->cache->ops->put_object(object);
+			}
+
+			fscache_operation_unlock(object);
+		}
+	}
+
+out:
+	up_write(&cookie->sem);
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end __fscache_pin_cookie() */
+
+EXPORT_SYMBOL(__fscache_pin_cookie);
+
+/*****************************************************************************/
+/*
+ * unpin an object into the cache
+ */
+void __fscache_unpin_cookie(struct fscache_cookie *cookie)
+{
+	struct fscache_object *object;
+	int ret;
+
+	_enter("%p", cookie);
+
+	if (hlist_empty(&cookie->backing_objects)) {
+		_leave(" [no obj]");
+		return;
+	}
+
+	/* not supposed to use this for indexes */
+	BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX);
+
+	/* prevent the file from being uncached whilst we access it and exclude
+	 * read and write attempts on pages
+	 */
+	down_write(&cookie->sem);
+
+	ret = -ENOBUFS;
+	if (!hlist_empty(&cookie->backing_objects)) {
+		/* get and unpin the backing object */
+		object = hlist_entry(cookie->backing_objects.first,
+				     struct fscache_object, cookie_link);
+
+		if (test_bit(FSCACHE_IOERROR, &object->cache->flags))
+			goto out;
+
+		if (!object->cache->ops->unpin_object)
+			goto out;
+
+		/* prevent the cache from being withdrawn */
+		if (fscache_operation_lock(object)) {
+			if (object->cache->ops->grab_object(object)) {
+				/* ask the cache to honour the operation */
+				object->cache->ops->unpin_object(object);
+
+				object->cache->ops->put_object(object);
+			}
+
+			fscache_operation_unlock(object);
+		}
+	}
+
+out:
+	up_write(&cookie->sem);
+	_leave("");
+
+} /* end __fscache_unpin_cookie() */
+
+EXPORT_SYMBOL(__fscache_unpin_cookie);
diff --git a/fs/fscache/fscache-int.h b/fs/fscache/fscache-int.h
new file mode 100644
index 0000000..d075660
--- /dev/null
+++ b/fs/fscache/fscache-int.h
@@ -0,0 +1,93 @@
+/* fscache-int.h: internal definitions
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _FSCACHE_INT_H
+#define _FSCACHE_INT_H
+
+#include <linux/fscache-cache.h>
+#include <linux/timer.h>
+#include <linux/bio.h>
+
+extern kmem_cache_t *fscache_cookie_jar;
+
+extern struct fscache_cookie fscache_fsdef_index;
+extern struct fscache_cookie_def fscache_fsdef_netfs_def;
+
+extern void fscache_cookie_init_once(void *_cookie, kmem_cache_t *cachep, unsigned long flags);
+
+/*
+ * prevent the cache from being withdrawn whilst an operation is in progress
+ * - returns false if the cache is being withdrawn already or if the cache is
+ *   waiting to withdraw itself
+ * - returns true if the cache was not being withdrawn
+ * - fscache_withdraw_cache() will wait using down_write() until all ops are
+ *   complete
+ */
+static inline int fscache_operation_lock(struct fscache_object *object)
+{
+	return down_read_trylock(&object->cache->withdrawal_sem);
+}
+
+/*
+ * release the operation lock
+ */
+static inline void fscache_operation_unlock(struct fscache_object *object)
+{
+	up_read(&object->cache->withdrawal_sem);
+}
+
+
+/*****************************************************************************/
+/*
+ * debug tracing
+ */
+#define dbgprintk(FMT,...) \
+	printk("[%-6.6s] "FMT"\n",current->comm ,##__VA_ARGS__)
+#define _dbprintk(FMT,...) do { } while(0)
+
+#define kenter(FMT,...)	dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define kleave(FMT,...)	dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define kdebug(FMT,...)	dbgprintk(FMT ,##__VA_ARGS__)
+
+#define kjournal(FMT,...) _dbprintk(FMT ,##__VA_ARGS__)
+
+#define dbgfree(ADDR)  _dbprintk("%p:%d: FREEING %p",__FILE__,__LINE__,ADDR)
+
+#define dbgpgalloc(PAGE)						\
+do {									\
+	_dbprintk("PGALLOC %s:%d: %p {%lx,%lu}\n",			\
+		  __FILE__,__LINE__,					\
+		  (PAGE),(PAGE)->mapping->host->i_ino,(PAGE)->index	\
+		  );							\
+} while(0)
+
+#define dbgpgfree(PAGE)						\
+do {								\
+	if ((PAGE))						\
+		_dbprintk("PGFREE %s:%d: %p {%lx,%lu}\n",	\
+			  __FILE__,__LINE__,			\
+			  (PAGE),				\
+			  (PAGE)->mapping->host->i_ino,		\
+			  (PAGE)->index				\
+			  );					\
+} while(0)
+
+#ifdef __KDEBUG
+#define _enter(FMT,...)	kenter(FMT,##__VA_ARGS__)
+#define _leave(FMT,...)	kleave(FMT,##__VA_ARGS__)
+#define _debug(FMT,...)	kdebug(FMT,##__VA_ARGS__)
+#else
+#define _enter(FMT,...)	do { } while(0)
+#define _leave(FMT,...)	do { } while(0)
+#define _debug(FMT,...)	do { } while(0)
+#endif
+
+#endif /* _FSCACHE_INT_H */
diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c
new file mode 100644
index 0000000..75a0d45
--- /dev/null
+++ b/fs/fscache/fsdef.c
@@ -0,0 +1,113 @@
+/* fsdef.c: filesystem index definition
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include "fscache-int.h"
+
+static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data,
+					    void *buffer, uint16_t bufmax);
+
+static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data,
+					    void *buffer, uint16_t bufmax);
+
+static fscache_checkaux_t fscache_fsdef_netfs_check_aux(void *cookie_netfs_data,
+							const void *data,
+							uint16_t datalen);
+
+struct fscache_cookie_def fscache_fsdef_netfs_def = {
+	.name		= "FSDEF.netfs",
+	.type		= FSCACHE_COOKIE_TYPE_INDEX,
+	.get_key	= fscache_fsdef_netfs_get_key,
+	.get_aux	= fscache_fsdef_netfs_get_aux,
+	.check_aux	= fscache_fsdef_netfs_check_aux,
+};
+
+struct fscache_cookie fscache_fsdef_index = {
+	.usage		= ATOMIC_INIT(1),
+	.def		= NULL,
+	.sem		= __RWSEM_INITIALIZER(fscache_fsdef_index.sem),
+	.backing_objects = HLIST_HEAD_INIT,
+};
+
+EXPORT_SYMBOL(fscache_fsdef_index);
+
+/*****************************************************************************/
+/*
+ * get the key data for an FSDEF index record
+ */
+static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data,
+					    void *buffer, uint16_t bufmax)
+{
+	const struct fscache_netfs *netfs = cookie_netfs_data;
+	unsigned klen;
+
+	_enter("{%s.%u},", netfs->name, netfs->version);
+
+	klen = strlen(netfs->name);
+	if (klen > bufmax)
+		return 0;
+
+	memcpy(buffer, netfs->name, klen);
+	return klen;
+
+} /* end fscache_fsdef_netfs_get_key() */
+
+/*****************************************************************************/
+/*
+ * get the auxilliary data for an FSDEF index record
+ */
+static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data,
+					    void *buffer, uint16_t bufmax)
+{
+	const struct fscache_netfs *netfs = cookie_netfs_data;
+	unsigned dlen;
+
+	_enter("{%s.%u},", netfs->name, netfs->version);
+
+	dlen = sizeof(uint32_t);
+	if (dlen > bufmax)
+		return 0;
+
+	memcpy(buffer, &netfs->version, dlen);
+	return dlen;
+
+} /* end fscache_fsdef_netfs_get_aux() */
+
+/*****************************************************************************/
+/*
+ * check that the version stored in the auxilliary data is correct
+ */
+static fscache_checkaux_t fscache_fsdef_netfs_check_aux(void *cookie_netfs_data,
+							const void *data,
+							uint16_t datalen)
+{
+	struct fscache_netfs *netfs = cookie_netfs_data;
+	uint32_t version;
+
+	_enter("{%s},,%hu", netfs->name, datalen);
+
+	if (datalen != sizeof(version)) {
+		_leave(" = OBSOLETE [dl=%d v=%d]",
+		       datalen, sizeof(version));
+		return FSCACHE_CHECKAUX_OBSOLETE;
+	}
+
+	memcpy(&version, data, sizeof(version));
+	if (version != netfs->version) {
+		_leave(" = OBSOLETE [ver=%x net=%x]",
+		       version, netfs->version);
+		return FSCACHE_CHECKAUX_OBSOLETE;
+	}
+
+	_leave(" = OKAY");
+	return FSCACHE_CHECKAUX_OKAY;
+
+} /* end fscache_fsdef_netfs_check_aux() */
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
new file mode 100644
index 0000000..4697917
--- /dev/null
+++ b/fs/fscache/main.c
@@ -0,0 +1,105 @@
+/* main.c: general filesystem caching manager
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include "fscache-int.h"
+
+int fscache_debug;
+
+static int fscache_init(void);
+static void fscache_exit(void);
+
+fs_initcall(fscache_init);
+module_exit(fscache_exit);
+
+MODULE_DESCRIPTION("FS Cache Manager");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+static void fscache_ktype_release(struct kobject *kobject);
+
+static struct sysfs_ops fscache_sysfs_ops = {
+	.show		= NULL,
+	.store		= NULL,
+};
+
+static struct kobj_type fscache_ktype = {
+	.release	= fscache_ktype_release,
+	.sysfs_ops	= &fscache_sysfs_ops,
+	.default_attrs	= NULL,
+};
+
+struct kset fscache_kset = {
+	.kobj.name	= "fscache",
+	.kobj.kset	= &fs_subsys.kset,
+	.ktype		= &fscache_ktype,
+};
+
+EXPORT_SYMBOL(fscache_kset);
+
+/*****************************************************************************/
+/*
+ * initialise the fs caching module
+ */
+static int __init fscache_init(void)
+{
+	int ret;
+
+	fscache_cookie_jar =
+		kmem_cache_create("fscache_cookie_jar",
+				  sizeof(struct fscache_cookie),
+				  0,
+				  0,
+				  fscache_cookie_init_once,
+				  NULL);
+
+	if (!fscache_cookie_jar) {
+		printk(KERN_NOTICE
+		       "FS-Cache: Failed to allocate a cookie jar\n");
+		return -ENOMEM;
+	}
+
+	ret = kset_register(&fscache_kset);
+	if (ret < 0) {
+		kmem_cache_destroy(fscache_cookie_jar);
+		return ret;
+	}
+
+	printk(KERN_NOTICE "FS-Cache: Loaded\n");
+	return 0;
+
+} /* end fscache_init() */
+
+/*****************************************************************************/
+/*
+ * clean up on module removal
+ */
+static void __exit fscache_exit(void)
+{
+	_enter("");
+
+	kset_unregister(&fscache_kset);
+	kmem_cache_destroy(fscache_cookie_jar);
+	printk(KERN_NOTICE "FS-Cache: unloaded\n");
+
+} /* end fscache_exit() */
+
+/*****************************************************************************/
+/*
+ * release the ktype
+ */
+static void fscache_ktype_release(struct kobject *kobject)
+{
+} /* end fscache_ktype_release() */
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
new file mode 100644
index 0000000..8d8ade5
--- /dev/null
+++ b/fs/fscache/page.c
@@ -0,0 +1,548 @@
+/* page.c: general filesystem cache cookie management
+ *
+ * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/fscache-cache.h>
+#include <linux/buffer_head.h>
+#include <linux/pagevec.h>
+#include "fscache-int.h"
+
+/*****************************************************************************/
+/*
+ * set the data file size on an object in the cache
+ */
+int __fscache_set_i_size(struct fscache_cookie *cookie, loff_t i_size)
+{
+	struct fscache_object *object;
+	int ret;
+
+	_enter("%p,%llu,", cookie, i_size);
+
+	if (hlist_empty(&cookie->backing_objects)) {
+		_leave(" = -ENOBUFS");
+		return -ENOBUFS;
+	}
+
+	/* not supposed to use this for indexes */
+	BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX);
+
+	/* prevent the file from being uncached whilst we access it and exclude
+	 * read and write attempts on pages
+	 */
+	down_write(&cookie->sem);
+
+	ret = -ENOBUFS;
+	if (!hlist_empty(&cookie->backing_objects)) {
+		/* get and pin the backing object */
+		object = hlist_entry(cookie->backing_objects.first,
+				     struct fscache_object, cookie_link);
+
+		if (test_bit(FSCACHE_IOERROR, &object->cache->flags))
+			goto out;
+
+		/* prevent the cache from being withdrawn */
+		if (object->cache->ops->set_i_size &&
+		    fscache_operation_lock(object)
+		    ) {
+			if (object->cache->ops->grab_object(object)) {
+				/* ask the cache to honour the operation */
+				ret = object->cache->ops->set_i_size(object,
+								     i_size);
+
+				object->cache->ops->put_object(object);
+			}
+
+			fscache_operation_unlock(object);
+		}
+	}
+
+out:
+	up_write(&cookie->sem);
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end __fscache_set_i_size() */
+
+EXPORT_SYMBOL(__fscache_set_i_size);
+
+/*****************************************************************************/
+/*
+ * reserve space for an object
+ */
+int __fscache_reserve_space(struct fscache_cookie *cookie, loff_t size)
+{
+	struct fscache_object *object;
+	int ret;
+
+	_enter("%p,%llu,", cookie, size);
+
+	if (hlist_empty(&cookie->backing_objects)) {
+		_leave(" = -ENOBUFS");
+		return -ENOBUFS;
+	}
+
+	/* not supposed to use this for indexes */
+	BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX);
+
+	/* prevent the file from being uncached whilst we access it and exclude
+	 * read and write attempts on pages
+	 */
+	down_write(&cookie->sem);
+
+	ret = -ENOBUFS;
+	if (!hlist_empty(&cookie->backing_objects)) {
+		/* get and pin the backing object */
+		object = hlist_entry(cookie->backing_objects.first,
+				     struct fscache_object, cookie_link);
+
+		if (test_bit(FSCACHE_IOERROR, &object->cache->flags))
+			goto out;
+
+		if (!object->cache->ops->reserve_space) {
+			ret = -EOPNOTSUPP;
+			goto out;
+		}
+
+		/* prevent the cache from being withdrawn */
+		if (fscache_operation_lock(object)) {
+			if (object->cache->ops->grab_object(object)) {
+				/* ask the cache to honour the operation */
+				ret = object->cache->ops->reserve_space(object,
+									size);
+
+				object->cache->ops->put_object(object);
+			}
+
+			fscache_operation_unlock(object);
+		}
+	}
+
+out:
+	up_write(&cookie->sem);
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end __fscache_reserve_space() */
+
+EXPORT_SYMBOL(__fscache_reserve_space);
+
+/*****************************************************************************/
+/*
+ * read a page from the cache or allocate a block in which to store it
+ * - we return:
+ *   -ENOMEM	- out of memory, nothing done
+ *   -EINTR	- interrupted
+ *   -ENOBUFS	- no backing object available in which to cache the block
+ *   -ENODATA	- no data available in the backing object for this block
+ *   0		- dispatched a read - it'll call end_io_func() when finished
+ */
+int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
+				 struct page *page,
+				 fscache_rw_complete_t end_io_func,
+				 void *context,
+				 gfp_t gfp)
+{
+	struct fscache_object *object;
+	int ret;
+
+	_enter("%p,{%lu},", cookie, page->index);
+
+	if (hlist_empty(&cookie->backing_objects)) {
+		_leave(" -ENOBUFS [no backing objects]");
+		return -ENOBUFS;
+	}
+
+	/* not supposed to use this for indexes */
+	BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX);
+
+	/* prevent the file from being uncached whilst we access it */
+	down_read(&cookie->sem);
+
+	ret = -ENOBUFS;
+	if (!hlist_empty(&cookie->backing_objects)) {
+		/* get and pin the backing object */
+		object = hlist_entry(cookie->backing_objects.first,
+				     struct fscache_object, cookie_link);
+
+		if (test_bit(FSCACHE_IOERROR, &object->cache->flags))
+			goto out;
+
+		/* prevent the cache from being withdrawn */
+		if (fscache_operation_lock(object)) {
+			if (object->cache->ops->grab_object(object)) {
+				/* ask the cache to honour the operation */
+				ret = object->cache->ops->read_or_alloc_page(
+					object,
+					page,
+					end_io_func,
+					context,
+					gfp);
+
+				object->cache->ops->put_object(object);
+			}
+
+			fscache_operation_unlock(object);
+		}
+	}
+
+out:
+	up_read(&cookie->sem);
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end __fscache_read_or_alloc_page() */
+
+EXPORT_SYMBOL(__fscache_read_or_alloc_page);
+
+/*****************************************************************************/
+/*
+ * read a list of page from the cache or allocate a block in which to store
+ * them
+ * - we return:
+ *   -ENOMEM	- out of memory, some pages may be being read
+ *   -EINTR	- interrupted, some pages may be being read
+ *   -ENOBUFS	- no backing object or space available in which to cache any
+ *                pages not being read
+ *   -ENODATA	- no data available in the backing object for some or all of
+ *                the pages
+ *   0		- dispatched a read on all pages
+ *
+ * end_io_func() will be called for each page read from the cache as it is
+ * finishes being read
+ *
+ * any pages for which a read is dispatched will be removed from pages and
+ * nr_pages
+ */
+int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
+				  struct address_space *mapping,
+				  struct list_head *pages,
+				  unsigned *nr_pages,
+				  fscache_rw_complete_t end_io_func,
+				  void *context,
+				  gfp_t gfp)
+{
+	struct fscache_object *object;
+	int ret;
+
+	_enter("%p,,%d,,,", cookie, *nr_pages);
+
+	if (hlist_empty(&cookie->backing_objects)) {
+		_leave(" -ENOBUFS [no backing objects]");
+		return -ENOBUFS;
+	}
+
+	/* not supposed to use this for indexes */
+	BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX);
+	BUG_ON(list_empty(pages));
+	BUG_ON(*nr_pages <= 0);
+
+	/* prevent the file from being uncached whilst we access it */
+	down_read(&cookie->sem);
+
+	ret = -ENOBUFS;
+	if (!hlist_empty(&cookie->backing_objects)) {
+		/* get and pin the backing object */
+		object = hlist_entry(cookie->backing_objects.first,
+				     struct fscache_object, cookie_link);
+
+		if (test_bit(FSCACHE_IOERROR, &object->cache->flags))
+			goto out;
+
+		/* prevent the cache from being withdrawn */
+		if (fscache_operation_lock(object)) {
+			if (object->cache->ops->grab_object(object)) {
+				/* ask the cache to honour the operation */
+				ret = object->cache->ops->read_or_alloc_pages(
+					object,
+					mapping,
+					pages,
+					nr_pages,
+					end_io_func,
+					context,
+					gfp);
+
+				object->cache->ops->put_object(object);
+			}
+
+			fscache_operation_unlock(object);
+		}
+	}
+
+out:
+	up_read(&cookie->sem);
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end __fscache_read_or_alloc_pages() */
+
+EXPORT_SYMBOL(__fscache_read_or_alloc_pages);
+
+/*****************************************************************************/
+/*
+ * allocate a block in the cache on which to store a page
+ * - we return:
+ *   -ENOMEM	- out of memory, nothing done
+ *   -EINTR	- interrupted
+ *   -ENOBUFS	- no backing object available in which to cache the block
+ *   0		- block allocated
+ */
+int __fscache_alloc_page(struct fscache_cookie *cookie,
+			 struct page *page,
+			 gfp_t gfp)
+{
+	struct fscache_object *object;
+	int ret;
+
+	_enter("%p,{%lu},", cookie, page->index);
+
+	if (hlist_empty(&cookie->backing_objects)) {
+		_leave(" -ENOBUFS [no backing objects]");
+		return -ENOBUFS;
+	}
+
+	/* not supposed to use this for indexes */
+	BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX);
+
+	/* prevent the file from being uncached whilst we access it */
+	down_read(&cookie->sem);
+
+	ret = -ENOBUFS;
+	if (!hlist_empty(&cookie->backing_objects)) {
+		/* get and pin the backing object */
+		object = hlist_entry(cookie->backing_objects.first,
+				     struct fscache_object, cookie_link);
+
+		if (test_bit(FSCACHE_IOERROR, &object->cache->flags))
+			goto out;
+
+		/* prevent the cache from being withdrawn */
+		if (fscache_operation_lock(object)) {
+			if (object->cache->ops->grab_object(object)) {
+				/* ask the cache to honour the operation */
+				ret = object->cache->ops->allocate_page(object,
+									page,
+									gfp);
+
+				object->cache->ops->put_object(object);
+			}
+
+			fscache_operation_unlock(object);
+		}
+	}
+
+out:
+	up_read(&cookie->sem);
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end __fscache_alloc_page() */
+
+EXPORT_SYMBOL(__fscache_alloc_page);
+
+/*****************************************************************************/
+/*
+ * request a page be stored in the cache
+ * - returns:
+ *   -ENOMEM	- out of memory, nothing done
+ *   -EINTR	- interrupted
+ *   -ENOBUFS	- no backing object available in which to cache the page
+ *   0		- dispatched a write - it'll call end_io_func() when finished
+ */
+int __fscache_write_page(struct fscache_cookie *cookie,
+			 struct page *page,
+			 fscache_rw_complete_t end_io_func,
+			 void *context,
+			 gfp_t gfp)
+{
+	struct fscache_object *object;
+	int ret;
+
+	_enter("%p,{%lu},", cookie, page->index);
+
+	/* not supposed to use this for indexes */
+	BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX);
+
+	/* prevent the file from been uncached whilst we deal with it */
+	down_read(&cookie->sem);
+
+	ret = -ENOBUFS;
+	if (!hlist_empty(&cookie->backing_objects)) {
+		object = hlist_entry(cookie->backing_objects.first,
+				     struct fscache_object, cookie_link);
+
+		if (test_bit(FSCACHE_IOERROR, &object->cache->flags))
+			goto out;
+
+		/* prevent the cache from being withdrawn */
+		if (fscache_operation_lock(object)) {
+			/* ask the cache to honour the operation */
+			ret = object->cache->ops->write_page(object,
+							     page,
+							     end_io_func,
+							     context,
+							     gfp);
+			fscache_operation_unlock(object);
+		}
+	}
+
+out:
+	up_read(&cookie->sem);
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end __fscache_write_page() */
+
+EXPORT_SYMBOL(__fscache_write_page);
+
+/*****************************************************************************/
+/*
+ * request several pages be stored in the cache
+ * - returns:
+ *   -ENOMEM	- out of memory, nothing done
+ *   -EINTR	- interrupted
+ *   -ENOBUFS	- no backing object available in which to cache the page
+ *   0		- dispatched a write - it'll call end_io_func() when finished
+ */
+int __fscache_write_pages(struct fscache_cookie *cookie,
+			  struct pagevec *pagevec,
+			  fscache_rw_complete_t end_io_func,
+			  void *context,
+			  gfp_t gfp)
+{
+	struct fscache_object *object;
+	int ret;
+
+	_enter("%p,{%ld},", cookie, pagevec->nr);
+
+	/* not supposed to use this for indexes */
+	BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX);
+
+	/* prevent the file from been uncached whilst we deal with it */
+	down_read(&cookie->sem);
+
+	ret = -ENOBUFS;
+	if (!hlist_empty(&cookie->backing_objects)) {
+		object = hlist_entry(cookie->backing_objects.first,
+				     struct fscache_object, cookie_link);
+
+		if (test_bit(FSCACHE_IOERROR, &object->cache->flags))
+			goto out;
+
+		/* prevent the cache from being withdrawn */
+		if (fscache_operation_lock(object)) {
+			/* ask the cache to honour the operation */
+			ret = object->cache->ops->write_pages(object,
+							      pagevec,
+							      end_io_func,
+							      context,
+							      gfp);
+			fscache_operation_unlock(object);
+		}
+	}
+
+out:
+	up_read(&cookie->sem);
+	_leave(" = %d", ret);
+	return ret;
+
+} /* end __fscache_write_pages() */
+
+EXPORT_SYMBOL(__fscache_write_pages);
+
+/*****************************************************************************/
+/*
+ * remove a page from the cache
+ */
+void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page)
+{
+	struct fscache_object *object;
+	struct pagevec pagevec;
+
+	_enter(",{%lu}", page->index);
+
+	/* not supposed to use this for indexes */
+	BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX);
+
+	if (hlist_empty(&cookie->backing_objects)) {
+		_leave(" [no backing]");
+		return;
+	}
+
+	pagevec_init(&pagevec, 0);
+	pagevec_add(&pagevec, page);
+
+	/* ask the cache to honour the operation */
+	down_read(&cookie->sem);
+
+	if (!hlist_empty(&cookie->backing_objects)) {
+		object = hlist_entry(cookie->backing_objects.first,
+				     struct fscache_object, cookie_link);
+
+		/* prevent the cache from being withdrawn */
+		if (fscache_operation_lock(object)) {
+			object->cache->ops->uncache_pages(object, &pagevec);
+			fscache_operation_unlock(object);
+		}
+	}
+
+	up_read(&cookie->sem);
+
+	_leave("");
+	return;
+
+} /* end __fscache_uncache_page() */
+
+EXPORT_SYMBOL(__fscache_uncache_page);
+
+/*****************************************************************************/
+/*
+ * remove a bunch of pages from the cache
+ */
+void __fscache_uncache_pages(struct fscache_cookie *cookie,
+			     struct pagevec *pagevec)
+{
+	struct fscache_object *object;
+
+	_enter(",{%ld}", pagevec->nr);
+
+	BUG_ON(pagevec->nr <= 0);
+	BUG_ON(!pagevec->pages[0]);
+
+	/* not supposed to use this for indexes */
+	BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX);
+
+	if (hlist_empty(&cookie->backing_objects)) {
+		_leave(" [no backing]");
+		return;
+	}
+
+	/* ask the cache to honour the operation */
+	down_read(&cookie->sem);
+
+	if (!hlist_empty(&cookie->backing_objects)) {
+		object = hlist_entry(cookie->backing_objects.first,
+				     struct fscache_object, cookie_link);
+
+		/* prevent the cache from being withdrawn */
+		if (fscache_operation_lock(object)) {
+			object->cache->ops->uncache_pages(object, pagevec);
+			fscache_operation_unlock(object);
+		}
+	}
+
+	up_read(&cookie->sem);
+
+	_leave("");
+	return;
+
+} /* end __fscache_uncache_pages() */
+
+EXPORT_SYMBOL(__fscache_uncache_pages);
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 0b572a0..2af6f22 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,9 +4,9 @@ #
 
 obj-$(CONFIG_NFS_FS) += nfs.o
 
-nfs-y 			:= dir.o file.o inode.o super.o nfs2xdr.o pagelist.o \
-			   proc.o read.o symlink.o unlink.o write.o \
-			   namespace.o
+nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
+			   pagelist.o proc.o read.o symlink.o unlink.o \
+			   write.o namespace.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
 nfs-$(CONFIG_NFS_V3_ACL)	+= nfs3acl.o
@@ -16,4 +16,5 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
 			   nfs4namespace.o
 nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
 nfs-$(CONFIG_SYSCTL) += sysctl.o
+nfs-$(CONFIG_NFS_FSCACHE) += fscache.o
 nfs-objs		:= $(nfs-y)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fe0a6b8..a3ee113 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -19,6 +19,7 @@ #include <net/inet_sock.h>
 
 #include "nfs4_fs.h"
 #include "callback.h"
+#include "internal.h"
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 
@@ -36,6 +37,21 @@ static struct svc_program nfs4_callback_
 
 unsigned int nfs_callback_set_tcpport;
 unsigned short nfs_callback_tcpport;
+static const int nfs_set_port_min = 0;
+static const int nfs_set_port_max = 65535;
+
+static int param_set_port(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int num = simple_strtol(val, &endp, 0);
+	if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
+		return -EINVAL;
+	*((int *)kp->arg) = num;
+	return 0;
+}
+
+module_param_call(callback_tcpport, param_set_port, param_get_int,
+		 &nfs_callback_set_tcpport, 0644);
 
 /*
  * This is the callback kernel thread.
@@ -134,10 +150,8 @@ out_err:
 /*
  * Kill the server process if it is not already up.
  */
-int nfs_callback_down(void)
+void nfs_callback_down(void)
 {
-	int ret = 0;
-
 	lock_kernel();
 	mutex_lock(&nfs_callback_mutex);
 	nfs_callback_info.users--;
@@ -149,20 +163,19 @@ int nfs_callback_down(void)
 	} while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
 	mutex_unlock(&nfs_callback_mutex);
 	unlock_kernel();
-	return ret;
 }
 
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
-	struct in_addr *addr = &rqstp->rq_addr.sin_addr;
-	struct nfs4_client *clp;
+	struct sockaddr_in *addr = &rqstp->rq_addr;
+	struct nfs_client *clp;
 
 	/* Don't talk to strangers */
-	clp = nfs4_find_client(addr);
+	clp = nfs_find_client(addr, 4);
 	if (clp == NULL)
 		return SVC_DROP;
-	dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr));
-	nfs4_put_client(clp);
+	dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr->sin_addr));
+	nfs_put_client(clp);
 	switch (rqstp->rq_authop->flavour) {
 		case RPC_AUTH_NULL:
 			if (rqstp->rq_proc != CB_NULL)
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index b252e7f..5676163 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -62,8 +62,13 @@ struct cb_recallargs {
 extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
 extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
 
+#ifdef CONFIG_NFS_V4
 extern int nfs_callback_up(void);
-extern int nfs_callback_down(void);
+extern void nfs_callback_down(void);
+#else
+#define nfs_callback_up()	(0)
+#define nfs_callback_down()	do {} while(0)
+#endif
 
 extern unsigned int nfs_callback_set_tcpport;
 extern unsigned short nfs_callback_tcpport;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 7719483..97cf8f7 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -10,19 +10,20 @@ #include <linux/nfs_fs.h>
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "internal.h"
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
  
 unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	struct nfs_delegation *delegation;
 	struct nfs_inode *nfsi;
 	struct inode *inode;
 	
 	res->bitmap[0] = res->bitmap[1] = 0;
 	res->status = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs4_find_client(&args->addr->sin_addr);
+	clp = nfs_find_client(args->addr, 4);
 	if (clp == NULL)
 		goto out;
 	inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -48,7 +49,7 @@ out_iput:
 	up_read(&nfsi->rwsem);
 	iput(inode);
 out_putclient:
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 out:
 	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status));
 	return res->status;
@@ -56,12 +57,12 @@ out:
 
 unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	struct inode *inode;
 	unsigned res;
 	
 	res = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs4_find_client(&args->addr->sin_addr);
+	clp = nfs_find_client(args->addr, 4);
 	if (clp == NULL)
 		goto out;
 	inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -80,7 +81,7 @@ unsigned nfs4_callback_recall(struct cb_
 	}
 	iput(inode);
 out_putclient:
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 out:
 	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res));
 	return res;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
new file mode 100644
index 0000000..bf701f1
--- /dev/null
+++ b/fs/nfs/client.c
@@ -0,0 +1,1439 @@
+/* client.c: NFS client sharing and management code
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/nfs_idmap.h>
+#include <linux/vfs.h>
+#include <linux/inet.h>
+#include <linux/nfs_xdr.h>
+
+#include <asm/system.h>
+
+#include "nfs4_fs.h"
+#include "callback.h"
+#include "delegation.h"
+#include "iostat.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY		NFSDBG_CLIENT
+
+static DEFINE_SPINLOCK(nfs_client_lock);
+static LIST_HEAD(nfs_client_list);
+static LIST_HEAD(nfs_volume_list);
+static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
+
+/*
+ * RPC cruft for NFS
+ */
+static struct rpc_version *nfs_version[5] = {
+	[2]			= &nfs_version2,
+#ifdef CONFIG_NFS_V3
+	[3]			= &nfs_version3,
+#endif
+#ifdef CONFIG_NFS_V4
+	[4]			= &nfs_version4,
+#endif
+};
+
+struct rpc_program nfs_program = {
+	.name			= "nfs",
+	.number			= NFS_PROGRAM,
+	.nrvers			= ARRAY_SIZE(nfs_version),
+	.version		= nfs_version,
+	.stats			= &nfs_rpcstat,
+	.pipe_dir_name		= "/nfs",
+};
+
+struct rpc_stat nfs_rpcstat = {
+	.program		= &nfs_program
+};
+
+
+#ifdef CONFIG_NFS_V3_ACL
+static struct rpc_stat		nfsacl_rpcstat = { &nfsacl_program };
+static struct rpc_version *	nfsacl_version[] = {
+	[3]			= &nfsacl_version3,
+};
+
+struct rpc_program		nfsacl_program = {
+	.name			= "nfsacl",
+	.number			= NFS_ACL_PROGRAM,
+	.nrvers			= ARRAY_SIZE(nfsacl_version),
+	.version		= nfsacl_version,
+	.stats			= &nfsacl_rpcstat,
+};
+#endif  /* CONFIG_NFS_V3_ACL */
+
+/*
+ * Allocate a shared client record
+ *
+ * Since these are allocated/deallocated very rarely, we don't
+ * bother putting them in a slab cache...
+ */
+static struct nfs_client *nfs_alloc_client(const char *hostname,
+					   const struct sockaddr_in *addr,
+					   int nfsversion)
+{
+	struct nfs_client *clp;
+	int error;
+
+	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
+		goto error_0;
+
+	error = rpciod_up();
+	if (error < 0) {
+		dprintk("%s: couldn't start rpciod! Error = %d\n",
+				__FUNCTION__, error);
+		__set_bit(NFS_CS_RPCIOD, &clp->cl_svcs_state);
+		goto error_1;
+	}
+
+	if (nfsversion == 4) {
+		if (nfs_callback_up() < 0)
+			goto error_2;
+		__set_bit(NFS_CS_CALLBACK, &clp->cl_svcs_state);
+	}
+
+	atomic_set(&clp->cl_count, 1);
+	clp->cl_cons_state = NFS_CS_INITING;
+
+	clp->cl_nfsversion = nfsversion;
+	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
+
+	if (hostname) {
+		clp->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+		if (!clp->cl_hostname)
+			goto error_3;
+	}
+
+	INIT_LIST_HEAD(&clp->cl_superblocks);
+	clp->cl_rpcclient = ERR_PTR(-EINVAL);
+
+#ifdef CONFIG_NFS_V4
+	init_rwsem(&clp->cl_sem);
+	INIT_LIST_HEAD(&clp->cl_delegations);
+	INIT_LIST_HEAD(&clp->cl_state_owners);
+	INIT_LIST_HEAD(&clp->cl_unused);
+	spin_lock_init(&clp->cl_lock);
+	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
+	clp->cl_boot_time = CURRENT_TIME;
+	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
+#endif
+
+	nfs_fscache_get_client_cookie(clp);
+
+	return clp;
+
+error_3:
+	nfs_callback_down();
+	__clear_bit(NFS_CS_CALLBACK, &clp->cl_svcs_state);
+error_2:
+	rpciod_down();
+	__clear_bit(NFS_CS_RPCIOD, &clp->cl_svcs_state);
+error_1:
+	kfree(clp);
+error_0:
+	return NULL;
+}
+
+/*
+ * Destroy a shared client record
+ */
+static void nfs_free_client(struct nfs_client *clp)
+{
+	dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion);
+
+#ifdef CONFIG_NFS_V4
+	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_svcs_state)) {
+		while (!list_empty(&clp->cl_unused)) {
+			struct nfs4_state_owner *sp;
+
+			sp = list_entry(clp->cl_unused.next,
+					struct nfs4_state_owner,
+					so_list);
+			list_del(&sp->so_list);
+			kfree(sp);
+		}
+		BUG_ON(!list_empty(&clp->cl_state_owners));
+		nfs_idmap_delete(clp);
+	}
+#endif
+
+	nfs_fscache_release_client_cookie(clp);
+
+	/* -EIO all pending I/O */
+	if (!IS_ERR(clp->cl_rpcclient))
+		rpc_shutdown_client(clp->cl_rpcclient);
+
+	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_svcs_state))
+		nfs_callback_down();
+
+	if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_svcs_state))
+	rpciod_down();
+
+	kfree(clp->cl_hostname);
+	kfree(clp);
+
+	dprintk("<-- nfs_free_client()\n");
+}
+
+/*
+ * Release a reference to a shared client record
+ */
+void nfs_put_client(struct nfs_client *clp)
+{
+	dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
+
+	if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
+		list_del(&clp->cl_share_link);
+		spin_unlock(&nfs_client_lock);
+
+		BUG_ON(!list_empty(&clp->cl_superblocks));
+
+		nfs_free_client(clp);
+	}
+}
+
+/*
+ * Find a client by address
+ * - caller must hold nfs_client_lock
+ */
+static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+{
+	struct nfs_client *clp;
+
+	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+		/* Different NFS versions cannot share the same nfs_client */
+		if (clp->cl_nfsversion != nfsversion)
+			continue;
+
+		if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr,
+			   sizeof(clp->cl_addr.sin_addr)) != 0)
+			continue;
+
+		if (clp->cl_addr.sin_port == addr->sin_port)
+			goto found;
+	}
+
+	return NULL;
+
+found:
+	atomic_inc(&clp->cl_count);
+	return clp;
+}
+
+/*
+ * Find a client by IP address and protocol version
+ * - returns NULL if no such client
+ */
+struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+{
+	struct nfs_client *clp;
+
+	spin_lock(&nfs_client_lock);
+	clp = __nfs_find_client(addr, nfsversion);
+	spin_unlock(&nfs_client_lock);
+
+	BUG_ON(clp->cl_cons_state == 0);
+
+	return clp;
+}
+
+/*
+ * Look up a client by IP address and protocol version
+ * - creates a new record if one doesn't yet exist
+ */
+static struct nfs_client *nfs_get_client(const char *hostname,
+					 const struct sockaddr_in *addr,
+					 int nfsversion)
+{
+	struct nfs_client *clp, *new = NULL;
+	int error;
+
+	dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n",
+		hostname ?: "", NIPQUAD(addr->sin_addr),
+		addr->sin_port, nfsversion);
+
+	/* see if the client already exists */
+	do {
+		spin_lock(&nfs_client_lock);
+
+		clp = __nfs_find_client(addr, nfsversion);
+		if (clp)
+			goto found_client;
+		if (new)
+			goto install_client;
+
+		spin_unlock(&nfs_client_lock);
+
+		new = nfs_alloc_client(hostname, addr, nfsversion);
+	} while (new);
+
+	return ERR_PTR(-ENOMEM);
+
+	/* install a new client and return with it unready */
+install_client:
+	clp = new;
+	list_add(&clp->cl_share_link, &nfs_client_list);
+	spin_unlock(&nfs_client_lock);
+	dprintk("--> nfs_get_client() = %p [new]\n", clp);
+	return clp;
+
+	/* found an existing client
+	 * - make sure it's ready before returning
+	 */
+found_client:
+	spin_unlock(&nfs_client_lock);
+
+	if (new)
+		nfs_free_client(new);
+
+	if (clp->cl_cons_state == NFS_CS_INITING) {
+		DECLARE_WAITQUEUE(myself, current);
+
+		add_wait_queue(&nfs_client_active_wq, &myself);
+
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (signal_pending(current) ||
+			    clp->cl_cons_state > NFS_CS_READY)
+				break;
+			schedule();
+		}
+
+		remove_wait_queue(&nfs_client_active_wq, &myself);
+
+		if (signal_pending(current)) {
+			nfs_put_client(clp);
+			return ERR_PTR(-ERESTARTSYS);
+		}
+	}
+
+	if (clp->cl_cons_state < NFS_CS_READY) {
+		error = clp->cl_cons_state;
+		nfs_put_client(clp);
+		return ERR_PTR(error);
+	}
+
+	BUG_ON(clp->cl_cons_state != NFS_CS_READY);
+
+	dprintk("--> nfs_get_client() = %p [share]\n", clp);
+	return clp;
+}
+
+/*
+ * Mark a server as ready or failed
+ */
+static void nfs_mark_client_ready(struct nfs_client *clp, int state)
+{
+	clp->cl_cons_state = state;
+	wake_up_all(&nfs_client_active_wq);
+}
+
+/*
+ * Initialise the timeout values for a connection
+ */
+static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
+				    unsigned int timeo, unsigned int retrans)
+{
+	to->to_initval = timeo * HZ / 10;
+	to->to_retries = retrans;
+	if (!to->to_retries)
+		to->to_retries = 2;
+
+	switch (proto) {
+	case IPPROTO_TCP:
+		if (!to->to_initval)
+			to->to_initval = 60 * HZ;
+		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
+			to->to_initval = NFS_MAX_TCP_TIMEOUT;
+		to->to_increment = to->to_initval;
+		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
+		to->to_exponential = 0;
+		break;
+	case IPPROTO_UDP:
+	default:
+		if (!to->to_initval)
+			to->to_initval = 11 * HZ / 10;
+		if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
+			to->to_initval = NFS_MAX_UDP_TIMEOUT;
+		to->to_maxval = NFS_MAX_UDP_TIMEOUT;
+		to->to_exponential = 1;
+		break;
+	}
+}
+
+/*
+ * Create an RPC client handle
+ */
+static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
+						unsigned int timeo,
+						unsigned int retrans,
+						rpc_authflavor_t flavor)
+{
+	struct rpc_timeout	timeparms;
+	struct rpc_xprt		*xprt = NULL;
+	struct rpc_clnt		*clnt = NULL;
+
+	if (!IS_ERR(clp->cl_rpcclient))
+		return 0;
+	nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
+
+	clp->retrans_timeo = timeparms.to_initval;
+	clp->retrans_count = timeparms.to_retries;
+
+	/* create transport and client */
+	xprt = xprt_create_proto(proto, &clp->cl_addr, &timeparms);
+	if (IS_ERR(xprt)) {
+		dprintk("%s: cannot create RPC transport. Error = %ld\n",
+				__FUNCTION__, PTR_ERR(xprt));
+		return PTR_ERR(xprt);
+	}
+
+	/* Bind to a reserved port! */
+	xprt->resvport = 1;
+	/* Create the client RPC handle */
+	clnt = rpc_create_client(xprt, clp->cl_hostname, &nfs_program,
+				 clp->rpc_ops->version, RPC_AUTH_UNIX);
+	if (IS_ERR(clnt)) {
+		dprintk("%s: cannot create RPC client. Error = %ld\n",
+				__FUNCTION__, PTR_ERR(clnt));
+		return PTR_ERR(clnt);
+	}
+
+	clnt->cl_intr     = 1;
+	clnt->cl_softrtry = 1;
+	clp->cl_rpcclient = clnt;
+	return 0;
+}
+
+/*
+ * Version 2 or 3 client destruction
+ */
+static void nfs_destroy_server(struct nfs_server *server)
+{
+	if (!IS_ERR(server->client_acl))
+		rpc_shutdown_client(server->client_acl);
+
+	if (!(server->flags & NFS_MOUNT_NONLM))
+		lockd_down();	/* release rpc.lockd */
+}
+
+/*
+ * Version 2 or 3 lockd setup
+ */
+static int nfs_start_lockd(struct nfs_server *server)
+{
+	int error = 0;
+
+	if (server->nfs_client->cl_nfsversion > 3)
+		goto out;
+	if (server->flags & NFS_MOUNT_NONLM)
+		goto out;
+	error = lockd_up();
+	if (error < 0)
+		server->flags |= NFS_MOUNT_NONLM;
+	else
+		server->destroy = nfs_destroy_server;
+out:
+	return error;
+}
+
+/*
+ * Initialise an NFSv3 ACL client connection
+ */
+#ifdef CONFIG_NFS_V3_ACL
+static void nfs_init_server_aclclient(struct nfs_server *server)
+{
+	if (server->nfs_client->cl_nfsversion != 3)
+		goto out_noacl;
+	if (server->flags & NFS_MOUNT_NOACL)
+		goto out_noacl;
+
+	server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
+	if (IS_ERR(server->client_acl))
+		goto out_noacl;
+
+	/* No errors! Assume that Sun nfsacls are supported */
+	server->caps |= NFS_CAP_ACLS;
+	return;
+
+out_noacl:
+	server->caps &= ~NFS_CAP_ACLS;
+}
+#else
+static inline void nfs_init_server_aclclient(struct nfs_server *server)
+{
+	server->flags &= ~NFS_MOUNT_NOACL;
+	server->caps &= ~NFS_CAP_ACLS;
+}
+#endif
+
+/*
+ * Create a general RPC client
+ */
+static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour)
+{
+	struct nfs_client *clp = server->nfs_client;
+
+	server->client = rpc_clone_client(clp->cl_rpcclient);
+	if (IS_ERR(server->client)) {
+		dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__);
+		return PTR_ERR(server->client);
+	}
+
+	if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) {
+		struct rpc_auth *auth;
+
+		auth = rpcauth_create(pseudoflavour, server->client);
+		if (IS_ERR(auth)) {
+			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
+			return PTR_ERR(auth);
+		}
+	}
+	server->client->cl_softrtry = 0;
+	if (server->flags & NFS_MOUNT_SOFT)
+		server->client->cl_softrtry = 1;
+
+	server->client->cl_intr = 0;
+	if (server->flags & NFS4_MOUNT_INTR)
+		server->client->cl_intr = 1;
+
+	return 0;
+}
+
+/*
+ * Initialise an NFS2 or NFS3 client
+ */
+static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data)
+{
+	int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
+	int error;
+
+	if (clp->cl_cons_state == NFS_CS_READY) {
+		/* the client is already initialised */
+		dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
+		return 0;
+	}
+
+	/* Check NFS protocol revision and initialize RPC op vector */
+	clp->rpc_ops = &nfs_v2_clientops;
+#ifdef CONFIG_NFS_V3
+	if (clp->cl_nfsversion == 3)
+		clp->rpc_ops = &nfs_v3_clientops;
+#endif
+	/*
+	 * Create a client RPC handle for doing FSSTAT with UNIX auth only
+	 * - RFC 2623, sec 2.3.2
+	 */
+	error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans,
+			RPC_AUTH_UNIX);
+	if (error < 0)
+		goto error;
+	nfs_mark_client_ready(clp, NFS_CS_READY);
+	return 0;
+
+error:
+	nfs_mark_client_ready(clp, error);
+	dprintk("<-- nfs_init_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 2 or 3 client
+ */
+static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data)
+{
+	struct nfs_client *clp;
+	int error, nfsvers = 2;
+
+	dprintk("--> nfs_init_server()\n");
+
+#ifdef CONFIG_NFS_V3
+	if (data->flags & NFS_MOUNT_VER3)
+		nfsvers = 3;
+#endif
+
+	/* Allocate or find a client reference we can use */
+	clp = nfs_get_client(data->hostname, &data->addr, nfsvers);
+	if (IS_ERR(clp)) {
+		dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
+		return PTR_ERR(clp);
+	}
+
+	error = nfs_init_client(clp, data);
+	if (error < 0)
+		goto error;
+
+	server->nfs_client = clp;
+
+	/* Initialise the client representation from the mount data */
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+
+	if (data->rsize)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+
+	server->acregmin = data->acregmin * HZ;
+	server->acregmax = data->acregmax * HZ;
+	server->acdirmin = data->acdirmin * HZ;
+	server->acdirmax = data->acdirmax * HZ;
+
+	/* Start lockd here, before we might error out */
+	error = nfs_start_lockd(server);
+	if (error < 0)
+		goto error;
+
+	error = nfs_init_server_rpcclient(server, data->pseudoflavor);
+	if (error < 0)
+		goto error;
+
+	server->namelen  = data->namlen;
+	/* Create a client RPC handle for the NFSv3 ACL management interface */
+	nfs_init_server_aclclient(server);
+	if (clp->cl_nfsversion == 3) {
+		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
+			server->namelen = NFS3_MAXNAMLEN;
+		server->caps |= NFS_CAP_READDIRPLUS;
+	} else {
+		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
+			server->namelen = NFS2_MAXNAMLEN;
+	}
+
+	dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
+	return 0;
+
+error:
+	server->nfs_client = NULL;
+	nfs_put_client(clp);
+	dprintk("<-- nfs_init_server() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Load up the server record from information gained in an fsinfo record
+ */
+static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo)
+{
+	unsigned long max_rpc_payload;
+
+	/* Work out a lot of parameters */
+	if (server->rsize == 0)
+		server->rsize = nfs_block_size(fsinfo->rtpref, NULL);
+	if (server->wsize == 0)
+		server->wsize = nfs_block_size(fsinfo->wtpref, NULL);
+
+	if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax)
+		server->rsize = nfs_block_size(fsinfo->rtmax, NULL);
+	if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
+		server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
+
+	max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
+	if (server->rsize > max_rpc_payload)
+		server->rsize = max_rpc_payload;
+	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
+		server->rsize = NFS_MAX_FILE_IO_SIZE;
+	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
+
+	if (server->wsize > max_rpc_payload)
+		server->wsize = max_rpc_payload;
+	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+		server->wsize = NFS_MAX_FILE_IO_SIZE;
+	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
+
+	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
+	if (server->dtsize > PAGE_CACHE_SIZE)
+		server->dtsize = PAGE_CACHE_SIZE;
+	if (server->dtsize > server->rsize)
+		server->dtsize = server->rsize;
+
+	if (server->flags & NFS_MOUNT_NOAC) {
+		server->acregmin = server->acregmax = 0;
+		server->acdirmin = server->acdirmax = 0;
+	}
+
+	server->maxfilesize = fsinfo->maxfilesize;
+
+	/* We're airborne Set socket buffersize */
+	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
+}
+
+/*
+ * Probe filesystem information, including the FSID on v2/v3
+ */
+static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr)
+{
+	struct nfs_fsinfo fsinfo;
+	struct nfs_client *clp = server->nfs_client;
+	int error;
+
+	dprintk("--> nfs_probe_fsinfo()\n");
+
+	if (clp->rpc_ops->set_capabilities != NULL) {
+		error = clp->rpc_ops->set_capabilities(server, mntfh);
+		if (error < 0)
+			goto out_error;
+	}
+
+	fsinfo.fattr = fattr;
+	nfs_fattr_init(fattr);
+	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
+	if (error < 0)
+		goto out_error;
+
+	nfs_server_set_fsinfo(server, &fsinfo);
+
+	/* Get some general file system info */
+	if (server->namelen == 0) {
+		struct nfs_pathconf pathinfo;
+
+		pathinfo.fattr = fattr;
+		nfs_fattr_init(fattr);
+
+		if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0)
+			server->namelen = pathinfo.max_namelen;
+	}
+
+	dprintk("<-- nfs_probe_fsinfo() = 0\n");
+	return 0;
+
+out_error:
+	dprintk("nfs_probe_fsinfo: error = %d\n", -error);
+	return error;
+}
+
+/*
+ * Copy useful information when duplicating a server record
+ */
+static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
+{
+	target->flags = source->flags;
+	target->acregmin = source->acregmin;
+	target->acregmax = source->acregmax;
+	target->acdirmin = source->acdirmin;
+	target->acdirmax = source->acdirmax;
+	target->caps = source->caps;
+}
+
+/*
+ * Allocate and initialise a server record
+ */
+static struct nfs_server *nfs_alloc_server(void)
+{
+	struct nfs_server *server;
+
+	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	if (!server)
+		return NULL;
+
+	server->client = server->client_acl = ERR_PTR(-EINVAL);
+
+	/* Zero out the NFS state stuff */
+	INIT_LIST_HEAD(&server->client_link);
+	INIT_LIST_HEAD(&server->master_link);
+
+	server->io_stats = nfs_alloc_iostats();
+	if (!server->io_stats) {
+		kfree(server);
+		return NULL;
+	}
+
+	return server;
+}
+
+/*
+ * Free up a server record
+ */
+void nfs_free_server(struct nfs_server *server)
+{
+	dprintk("--> nfs_free_server()\n");
+
+	spin_lock(&nfs_client_lock);
+	list_del(&server->client_link);
+	list_del(&server->master_link);
+	spin_unlock(&nfs_client_lock);
+
+	if (server->destroy != NULL)
+		server->destroy(server);
+	if (!IS_ERR(server->client))
+		rpc_shutdown_client(server->client);
+
+	nfs_put_client(server->nfs_client);
+
+	nfs_free_iostats(server->io_stats);
+	kfree(server);
+	nfs_release_automount_timer();
+	dprintk("<-- nfs_free_server()\n");
+}
+
+/*
+ * Create a version 2 or 3 volume record
+ * - keyed on server and FSID
+ */
+struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
+				     struct nfs_fh *mntfh)
+{
+	struct nfs_server *server;
+	struct nfs_fattr fattr;
+	int error;
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Get a client representation */
+	error = nfs_init_server(server, data);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* Probe the root fh to retrieve its FSID */
+	error = nfs_probe_fsinfo(server, mntfh, &fattr);
+	if (error < 0)
+		goto error;
+	if (!(fattr.valid & NFS_ATTR_FATTR)) {
+		error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
+		if (error < 0) {
+			dprintk("nfs_create_server: getattr error = %d\n", -error);
+			goto error;
+		}
+	}
+	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+
+	dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor);
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+	return server;
+
+error:
+	nfs_free_server(server);
+	return ERR_PTR(error);
+}
+
+#ifdef CONFIG_NFS_V4
+/*
+ * Initialise an NFS4 client record
+ */
+static int nfs4_init_client(struct nfs_client *clp,
+		int proto, int timeo, int retrans,
+		rpc_authflavor_t authflavour)
+{
+	int error;
+
+	if (clp->cl_cons_state == NFS_CS_READY) {
+		/* the client is initialised already */
+		dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
+		return 0;
+	}
+
+	/* Check NFS protocol revision and initialize RPC op vector */
+	clp->rpc_ops = &nfs_v4_clientops;
+
+	error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour);
+	if (error < 0)
+		goto error;
+
+	error = nfs_idmap_new(clp);
+	if (error < 0) {
+		dprintk("%s: failed to create idmapper. Error = %d\n",
+			__FUNCTION__, error);
+		__set_bit(NFS_CS_IDMAP, &clp->cl_svcs_state);
+		goto error;
+	}
+
+	nfs_mark_client_ready(clp, NFS_CS_READY);
+	return 0;
+
+error:
+	nfs_mark_client_ready(clp, error);
+	dprintk("<-- nfs4_init_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Set up an NFS4 client
+ */
+static int nfs4_set_client(struct nfs_server *server,
+		const char *hostname, const struct sockaddr_in *addr,
+		rpc_authflavor_t authflavour,
+		int proto, int timeo, int retrans)
+{
+	struct nfs_client *clp;
+	int error;
+
+	dprintk("--> nfs4_set_client()\n");
+
+	/* Allocate or find a client reference we can use */
+	clp = nfs_get_client(hostname, addr, 4);
+	if (IS_ERR(clp)) {
+		error = PTR_ERR(clp);
+		goto error;
+	}
+	error = nfs4_init_client(clp, proto, timeo, retrans, authflavour);
+	if (error < 0)
+		goto error_put;
+
+	server->nfs_client = clp;
+	dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
+	return 0;
+
+error_put:
+	nfs_put_client(clp);
+error:
+	dprintk("<-- nfs4_set_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 4 volume record
+ */
+static int nfs4_init_server(struct nfs_server *server,
+		const struct nfs4_mount_data *data, rpc_authflavor_t authflavour)
+{
+	int error;
+
+	dprintk("--> nfs4_init_server()\n");
+
+	/* Initialise the client representation from the mount data */
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+	server->caps |= NFS_CAP_ATOMIC_OPEN;
+
+	if (data->rsize)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+
+	server->acregmin = data->acregmin * HZ;
+	server->acregmax = data->acregmax * HZ;
+	server->acdirmin = data->acdirmin * HZ;
+	server->acdirmax = data->acdirmax * HZ;
+
+	error = nfs_init_server_rpcclient(server, authflavour);
+
+	/* Done */
+	dprintk("<-- nfs4_init_server() = %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 4 volume record
+ * - keyed on server and FSID
+ */
+struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
+				      const char *hostname,
+				      const struct sockaddr_in *addr,
+				      const char *mntpath,
+				      const char *ip_addr,
+				      rpc_authflavor_t authflavour,
+				      struct nfs_fh *mntfh)
+{
+	struct nfs_fattr fattr;
+	struct nfs_server *server;
+	int error;
+
+	dprintk("--> nfs4_create_server()\n");
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Get a client record */
+	error = nfs4_set_client(server, hostname, addr, authflavour,
+			data->proto, data->timeo, data->retrans);
+	if (error < 0)
+		goto error;
+
+	/* set up the general RPC client */
+	error = nfs4_init_server(server, data, authflavour);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* Probe the root fh to retrieve its FSID */
+	error = nfs4_path_walk(server, mntfh, mntpath);
+	if (error < 0)
+		goto error;
+
+	dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor);
+	dprintk("Mount FH: %d\n", mntfh->size);
+
+	error = nfs_probe_fsinfo(server, mntfh, &fattr);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+	dprintk("<-- nfs4_create_server() = %p\n", server);
+	return server;
+
+error:
+	nfs_free_server(server);
+	dprintk("<-- nfs4_create_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
+
+/*
+ * Create an NFS4 referral server record
+ */
+struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
+					       struct nfs_fh *fh)
+{
+	struct nfs_client *parent_client;
+	struct nfs_server *server, *parent_server;
+	struct nfs_fattr fattr;
+	int error;
+
+	dprintk("--> nfs4_create_referral_server()\n");
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	parent_server = NFS_SB(data->sb);
+	parent_client = parent_server->nfs_client;
+
+	/* Get a client representation.
+	 * Note: NFSv4 always uses TCP, */
+	error = nfs4_set_client(server, data->hostname, data->addr,
+			data->authflavor,
+			parent_server->client->cl_xprt->prot,
+			parent_client->retrans_timeo,
+			parent_client->retrans_count);
+
+	/* Initialise the client representation from the parent server */
+	nfs_server_copy_userdata(server, parent_server);
+	server->caps |= NFS_CAP_ATOMIC_OPEN;
+
+	error = nfs_init_server_rpcclient(server, data->authflavor);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* probe the filesystem info for this server filesystem */
+	error = nfs_probe_fsinfo(server, fh, &fattr);
+	if (error < 0)
+		goto error;
+
+	dprintk("Referral FSID: %llx:%llx\n",
+		server->fsid.major, server->fsid.minor);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+
+	dprintk("<-- nfs_create_referral_server() = %p\n", server);
+	return server;
+
+error:
+	nfs_free_server(server);
+	dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
+
+#endif /* CONFIG_NFS_V4 */
+
+/*
+ * Clone an NFS2, NFS3 or NFS4 server record
+ */
+struct nfs_server *nfs_clone_server(struct nfs_server *source,
+				    struct nfs_fh *fh,
+				    struct nfs_fattr *fattr)
+{
+	struct nfs_server *server;
+	struct nfs_fattr fattr_fsinfo;
+	int error;
+
+	dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
+		fattr->fsid.major, fattr->fsid.minor);
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Copy data from the source */
+	server->nfs_client = source->nfs_client;
+	atomic_inc(&server->nfs_client->cl_count);
+	nfs_server_copy_userdata(server, source);
+
+	server->fsid = fattr->fsid;
+
+	error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor);
+	if (error < 0)
+		goto out_free_server;
+	if (!IS_ERR(source->client_acl))
+		nfs_init_server_aclclient(server);
+
+	/* probe the filesystem info for this server filesystem */
+	error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo);
+	if (error < 0)
+		goto out_free_server;
+
+	dprintk("Cloned FSID: %llx:%llx\n",
+		server->fsid.major, server->fsid.minor);
+
+	error = nfs_start_lockd(server);
+	if (error < 0)
+		goto out_free_server;
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+
+	dprintk("<-- nfs_clone_server() = %p\n", server);
+	return server;
+
+out_free_server:
+	nfs_free_server(server);
+	dprintk("<-- nfs_clone_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry *proc_fs_nfs;
+
+static int nfs_server_list_open(struct inode *inode, struct file *file);
+static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
+static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
+static void nfs_server_list_stop(struct seq_file *p, void *v);
+static int nfs_server_list_show(struct seq_file *m, void *v);
+
+static struct seq_operations nfs_server_list_ops = {
+	.start	= nfs_server_list_start,
+	.next	= nfs_server_list_next,
+	.stop	= nfs_server_list_stop,
+	.show	= nfs_server_list_show,
+};
+
+static struct file_operations nfs_server_list_fops = {
+	.open		= nfs_server_list_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int nfs_volume_list_open(struct inode *inode, struct file *file);
+static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos);
+static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
+static void nfs_volume_list_stop(struct seq_file *p, void *v);
+static int nfs_volume_list_show(struct seq_file *m, void *v);
+
+static struct seq_operations nfs_volume_list_ops = {
+	.start	= nfs_volume_list_start,
+	.next	= nfs_volume_list_next,
+	.stop	= nfs_volume_list_stop,
+	.show	= nfs_volume_list_show,
+};
+
+static struct file_operations nfs_volume_list_fops = {
+	.open		= nfs_volume_list_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+/*
+ * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which
+ * we're dealing
+ */
+static int nfs_server_list_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &nfs_server_list_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+}
+
+/*
+ * set up the iterator to start reading from the server list and return the first item
+ */
+static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	spin_lock(&nfs_client_lock);
+
+	/* allow for the header line */
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p, &nfs_client_list)
+		if (!pos--)
+			break;
+
+	return _p != &nfs_client_list ? _p : NULL;
+}
+
+/*
+ * move to next server
+ */
+static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? nfs_client_list.next : _p->next;
+
+	return _p != &nfs_client_list ? _p : NULL;
+}
+
+/*
+ * clean up after reading from the transports list
+ */
+static void nfs_server_list_stop(struct seq_file *p, void *v)
+{
+	spin_unlock(&nfs_client_lock);
+}
+
+/*
+ * display a header line followed by a load of call lines
+ */
+static int nfs_server_list_show(struct seq_file *m, void *v)
+{
+	struct nfs_client *clp;
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "NV SERVER   PORT USE HOSTNAME\n");
+		return 0;
+	}
+
+	/* display one transport per line on subsequent lines */
+	clp = list_entry(v, struct nfs_client, cl_share_link);
+
+	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n",
+		   clp->cl_nfsversion,
+		   NIPQUAD(clp->cl_addr.sin_addr),
+		   ntohs(clp->cl_addr.sin_port),
+		   atomic_read(&clp->cl_count),
+		   clp->cl_hostname);
+
+	return 0;
+}
+
+/*
+ * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes
+ */
+static int nfs_volume_list_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &nfs_volume_list_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+}
+
+/*
+ * set up the iterator to start reading from the volume list and return the first item
+ */
+static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	spin_lock(&nfs_client_lock);
+
+	/* allow for the header line */
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p, &nfs_volume_list)
+		if (!pos--)
+			break;
+
+	return _p != &nfs_volume_list ? _p : NULL;
+}
+
+/*
+ * move to next volume
+ */
+static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? nfs_volume_list.next : _p->next;
+
+	return _p != &nfs_volume_list ? _p : NULL;
+}
+
+/*
+ * clean up after reading from the transports list
+ */
+static void nfs_volume_list_stop(struct seq_file *p, void *v)
+{
+	spin_unlock(&nfs_client_lock);
+}
+
+/*
+ * display a header line followed by a load of call lines
+ */
+static int nfs_volume_list_show(struct seq_file *m, void *v)
+{
+	struct nfs_server *server;
+	struct nfs_client *clp;
+	char dev[8], fsid[17];
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "NV SERVER   PORT DEV     FSID              FSC\n");
+		return 0;
+	}
+	/* display one transport per line on subsequent lines */
+	server = list_entry(v, struct nfs_server, master_link);
+	clp = server->nfs_client;
+
+	snprintf(dev, 8, "%u:%u",
+		 MAJOR(server->s_dev), MINOR(server->s_dev));
+
+	snprintf(fsid, 17, "%llx:%llx",
+		 server->fsid.major, server->fsid.minor);
+
+	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s %s\n",
+		   clp->cl_nfsversion,
+		   NIPQUAD(clp->cl_addr.sin_addr),
+		   ntohs(clp->cl_addr.sin_port),
+		   dev,
+		   fsid,
+		   nfs_server_fscache_state(server));
+
+	return 0;
+}
+
+/*
+ * initialise the /proc/fs/nfsfs/ directory
+ */
+int __init nfs_fs_proc_init(void)
+{
+	struct proc_dir_entry *p;
+
+	proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs);
+	if (!proc_fs_nfs)
+		goto error_0;
+
+	proc_fs_nfs->owner = THIS_MODULE;
+
+	/* a file of servers with which we're dealing */
+	p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs);
+	if (!p)
+		goto error_1;
+
+	p->proc_fops = &nfs_server_list_fops;
+	p->owner = THIS_MODULE;
+
+	/* a file of volumes that we have mounted */
+	p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs);
+	if (!p)
+		goto error_2;
+
+	p->proc_fops = &nfs_volume_list_fops;
+	p->owner = THIS_MODULE;
+	return 0;
+
+error_2:
+	remove_proc_entry("servers", proc_fs_nfs);
+error_1:
+	remove_proc_entry("nfsfs", proc_root_fs);
+error_0:
+	return -ENOMEM;
+}
+
+/*
+ * clean up the /proc/fs/nfsfs/ directory
+ */
+void nfs_fs_proc_exit(void)
+{
+	remove_proc_entry("volumes", proc_fs_nfs);
+	remove_proc_entry("servers", proc_fs_nfs);
+	remove_proc_entry("nfsfs", proc_root_fs);
+}
+
+#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 9540a31..5713367 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -18,6 +18,7 @@ #include <linux/nfs_xdr.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "internal.h"
 
 static struct nfs_delegation *nfs_alloc_delegation(void)
 {
@@ -52,7 +53,7 @@ static int nfs_delegation_claim_locks(st
 			case -NFS4ERR_EXPIRED:
 				/* kill_proc(fl->fl_pid, SIGLOST, 1); */
 			case -NFS4ERR_STALE_CLIENTID:
-				nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs4_state);
+				nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client);
 				goto out_err;
 		}
 	}
@@ -114,7 +115,7 @@ void nfs_inode_reclaim_delegation(struct
  */
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int status = 0;
@@ -145,7 +146,7 @@ int nfs_inode_set_delegation(struct inod
 					sizeof(delegation->stateid)) != 0 ||
 				delegation->type != nfsi->delegation->type) {
 			printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
-					__FUNCTION__, NIPQUAD(clp->cl_addr));
+					__FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr));
 			status = -EIO;
 		}
 	}
@@ -176,7 +177,7 @@ static void nfs_msync_inode(struct inode
  */
 int __nfs_inode_return_delegation(struct inode *inode)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int res = 0;
@@ -208,7 +209,7 @@ int __nfs_inode_return_delegation(struct
  */
 void nfs_return_all_delegations(struct super_block *sb)
 {
-	struct nfs4_client *clp = NFS_SB(sb)->nfs4_state;
+	struct nfs_client *clp = NFS_SB(sb)->nfs_client;
 	struct nfs_delegation *delegation;
 	struct inode *inode;
 
@@ -232,7 +233,7 @@ restart:
 
 int nfs_do_expire_all_delegations(void *ptr)
 {
-	struct nfs4_client *clp = ptr;
+	struct nfs_client *clp = ptr;
 	struct nfs_delegation *delegation;
 	struct inode *inode;
 
@@ -254,11 +255,11 @@ restart:
 	}
 out:
 	spin_unlock(&clp->cl_lock);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put_and_exit(0);
 }
 
-void nfs_expire_all_delegations(struct nfs4_client *clp)
+void nfs_expire_all_delegations(struct nfs_client *clp)
 {
 	struct task_struct *task;
 
@@ -266,17 +267,17 @@ void nfs_expire_all_delegations(struct n
 	atomic_inc(&clp->cl_count);
 	task = kthread_run(nfs_do_expire_all_delegations, clp,
 			"%u.%u.%u.%u-delegreturn",
-			NIPQUAD(clp->cl_addr));
+			NIPQUAD(clp->cl_addr.sin_addr));
 	if (!IS_ERR(task))
 		return;
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put(THIS_MODULE);
 }
 
 /*
  * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
  */
-void nfs_handle_cb_pathdown(struct nfs4_client *clp)
+void nfs_handle_cb_pathdown(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
 	struct inode *inode;
@@ -299,7 +300,7 @@ restart:
 
 struct recall_threadargs {
 	struct inode *inode;
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	const nfs4_stateid *stateid;
 
 	struct completion started;
@@ -310,7 +311,7 @@ static int recall_thread(void *data)
 {
 	struct recall_threadargs *args = (struct recall_threadargs *)data;
 	struct inode *inode = igrab(args->inode);
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 
@@ -371,7 +372,7 @@ out_module_put:
 /*
  * Retrieve the inode associated with a delegation
  */
-struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle)
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
 {
 	struct nfs_delegation *delegation;
 	struct inode *res = NULL;
@@ -389,7 +390,7 @@ struct inode *nfs_delegation_find_inode(
 /*
  * Mark all delegations as needing to be reclaimed
  */
-void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
+void nfs_delegation_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
 	spin_lock(&clp->cl_lock);
@@ -401,7 +402,7 @@ void nfs_delegation_mark_reclaim(struct 
 /*
  * Reap all unclaimed delegations after reboot recovery is done
  */
-void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
+void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation, *n;
 	LIST_HEAD(head);
@@ -423,7 +424,7 @@ void nfs_delegation_reap_unclaimed(struc
 
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int res = 0;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 3858694..2cfd4b2 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,13 +29,13 @@ void nfs_inode_reclaim_delegation(struct
 int __nfs_inode_return_delegation(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
 
-struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
 void nfs_return_all_delegations(struct super_block *sb);
-void nfs_expire_all_delegations(struct nfs4_client *clp);
-void nfs_handle_cb_pathdown(struct nfs4_client *clp);
+void nfs_expire_all_delegations(struct nfs_client *clp);
+void nfs_handle_cb_pathdown(struct nfs_client *clp);
 
-void nfs_delegation_mark_reclaim(struct nfs4_client *clp);
-void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
+void nfs_delegation_mark_reclaim(struct nfs_client *clp);
+void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
 
 /* NFSv4 delegation-related procedures */
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e7ffb4d..7bed6c0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -31,6 +31,7 @@ #include <linux/nfs_mount.h>
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
+#include <linux/mount.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -870,14 +871,14 @@ int nfs_is_exclusive_create(struct inode
 	return (nd->intent.open.flags & O_EXCL) != 0;
 }
 
-static inline int nfs_reval_fsid(struct inode *dir,
-		struct nfs_fh *fh, struct nfs_fattr *fattr)
+static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
+				 struct nfs_fh *fh, struct nfs_fattr *fattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 
 	if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
 		/* Revalidate fsid on root dir */
-		return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode);
+		return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
 	return 0;
 }
 
@@ -913,7 +914,7 @@ static struct dentry *nfs_lookup(struct 
 		res = ERR_PTR(error);
 		goto out_unlock;
 	}
-	error = nfs_reval_fsid(dir, &fhandle, &fattr);
+	error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
 	if (error < 0) {
 		res = ERR_PTR(error);
 		goto out_unlock;
@@ -922,8 +923,9 @@ static struct dentry *nfs_lookup(struct 
 	res = (struct dentry *)inode;
 	if (IS_ERR(res))
 		goto out_unlock;
+
 no_entry:
-	res = d_add_unique(dentry, inode);
+	res = d_materialise_unique(dentry, inode);
 	if (res != NULL)
 		dentry = res;
 	nfs_renew_times(dentry);
@@ -1117,11 +1119,13 @@ static struct dentry *nfs_readdir_lookup
 		dput(dentry);
 		return NULL;
 	}
-	alias = d_add_unique(dentry, inode);
+
+	alias = d_materialise_unique(dentry, inode);
 	if (alias != NULL) {
 		dput(dentry);
 		dentry = alias;
 	}
+
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	return dentry;
@@ -1147,7 +1151,7 @@ int nfs_instantiate(struct dentry *dentr
 	}
 	if (!(fattr->valid & NFS_ATTR_FATTR)) {
 		struct nfs_server *server = NFS_SB(dentry->d_sb);
-		error = server->rpc_ops->getattr(server, fhandle, fattr);
+		error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
 		if (error < 0)
 			goto out_err;
 	}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index cc2b874..855bb97 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -27,12 +27,14 @@ #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
+#include <linux/buffer_head.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
 #include "delegation.h"
 #include "iostat.h"
+#include "internal.h"
 
 #define NFSDBG_FACILITY		NFSDBG_FILE
 
@@ -111,7 +113,7 @@ nfs_file_open(struct inode *inode, struc
 
 	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 	lock_kernel();
-	res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
+	res = NFS_PROTO(inode)->file_open(inode, filp);
 	unlock_kernel();
 	return res;
 }
@@ -249,6 +251,10 @@ nfs_file_mmap(struct file * file, struct
 	status = nfs_revalidate_mapping(inode, file->f_mapping);
 	if (!status)
 		status = generic_file_mmap(file, vma);
+
+	if (status == 0)
+		nfs_fscache_install_vm_ops(inode, vma);
+
 	return status;
 }
 
@@ -308,13 +314,35 @@ static void nfs_invalidate_page(struct p
 	/* Cancel any unstarted writes on this page */
 	if (offset == 0)
 		nfs_sync_inode_wait(inode, page->index, 1, FLUSH_INVALIDATE);
+
+	nfs_fscache_invalidate_page(page, inode, offset);
+
+	/* we can do this here as the bits are only set with the page lock
+	 * held, and our caller is holding that */
+	if (!page->private)
+		ClearPagePrivate(page);
 }
 
 static int nfs_release_page(struct page *page, gfp_t gfp)
 {
-	return !nfs_wb_page(page->mapping->host, page);
+	int error = nfs_wb_page(page->mapping->host, page);
+
+	if (error == 0) {
+		nfs_fscache_release_page(page);
+
+		/* may have been set due to either caching or writing */
+		ClearPagePrivate(page);
+	}
+
+	/* releasepage() returns true/false */
+	return (error == 0) ? 1 : 0;
 }
 
+/*
+ * Since we use page->private for our own nefarious purposes when using
+ * fscache, we have to override extra address space ops to prevent fs/buffer.c
+ * from getting confused, even though we may not have asked its opinion
+ */
 const struct address_space_operations nfs_file_aops = {
 	.readpage = nfs_readpage,
 	.readpages = nfs_readpages,
@@ -328,6 +356,9 @@ const struct address_space_operations nf
 #ifdef CONFIG_NFS_DIRECTIO
 	.direct_IO = nfs_direct_IO,
 #endif
+#ifdef CONFIG_NFS_FSCACHE
+	.sync_page	= block_sync_page,
+#endif
 };
 
 /* 
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
new file mode 100644
index 0000000..5400e6a
--- /dev/null
+++ b/fs/nfs/fscache.c
@@ -0,0 +1,349 @@
+/* fscache.c: NFS filesystem cache interface
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_fs_sb.h>
+#include <linux/in6.h>
+
+#include "internal.h"
+
+/*
+ * Sysctl variables
+ */
+atomic_t nfs_fscache_to_pages;
+atomic_t nfs_fscache_from_pages;
+atomic_t nfs_fscache_uncache_page;
+int nfs_fscache_from_error;
+int nfs_fscache_to_error;
+
+#define NFSDBG_FACILITY		NFSDBG_FSCACHE
+
+/* the auxiliary data in the cache (used for coherency management) */
+struct nfs_fh_auxdata {
+	struct timespec	i_mtime;
+	struct timespec	i_ctime;
+	loff_t		i_size;
+};
+
+static struct fscache_netfs_operations nfs_cache_ops = {
+};
+
+struct fscache_netfs nfs_cache_netfs = {
+	.name			= "nfs",
+	.version		= 0,
+	.ops			= &nfs_cache_ops,
+};
+
+static const uint8_t nfs_cache_ipv6_wrapper_for_ipv4[12] = {
+	[0 ... 9]	= 0x00,
+	[10 ... 11]	= 0xff
+};
+
+struct nfs_server_key {
+	uint16_t nfsversion;
+	uint16_t port;
+	union {
+		struct {
+			uint8_t		ipv6wrapper[12];
+			struct in_addr	addr;
+		} ipv4_addr;
+		struct in6_addr ipv6_addr;
+	};
+};
+
+static uint16_t nfs_server_get_key(const void *cookie_netfs_data,
+				   void *buffer, uint16_t bufmax)
+{
+	const struct nfs_client *clp = cookie_netfs_data;
+	struct nfs_server_key *key = buffer;
+	uint16_t len = 0;
+
+	key->nfsversion = clp->cl_nfsversion;
+
+	switch (clp->cl_addr.sin_family) {
+	case AF_INET:
+		key->port = clp->cl_addr.sin_port;
+
+		memcpy(&key->ipv4_addr.ipv6wrapper,
+		       &nfs_cache_ipv6_wrapper_for_ipv4,
+		       sizeof(key->ipv4_addr.ipv6wrapper));
+		memcpy(&key->ipv4_addr.addr,
+		       &clp->cl_addr.sin_addr,
+		       sizeof(key->ipv4_addr.addr));
+		len = sizeof(struct nfs_server_key);
+		break;
+
+	case AF_INET6:
+		key->port = clp->cl_addr.sin_port;
+
+		memcpy(&key->ipv6_addr,
+		       &clp->cl_addr.sin_addr,
+		       sizeof(key->ipv6_addr));
+		len = sizeof(struct nfs_server_key);
+		break;
+
+	default:
+		len = 0;
+		printk(KERN_WARNING "NFS: Unknown network family '%d'\n",
+			clp->cl_addr.sin_family);
+		break;
+	}
+
+	return len;
+}
+
+/*
+ * the root index for the filesystem is defined by nfsd IP address and ports
+ */
+struct fscache_cookie_def nfs_cache_server_index_def = {
+	.name		= "NFS.servers",
+	.type 		= FSCACHE_COOKIE_TYPE_INDEX,
+	.get_key	= nfs_server_get_key,
+};
+
+static uint16_t nfs_fh_get_key(const void *cookie_netfs_data,
+		void *buffer, uint16_t bufmax)
+{
+	const struct nfs_inode *nfsi = cookie_netfs_data;
+	uint16_t nsize;
+
+	/* set the file handle */
+	nsize = nfsi->fh.size;
+	memcpy(buffer, nfsi->fh.data, nsize);
+	return nsize;
+}
+
+/*
+ * indication of pages that now have cache metadata retained
+ * - this function should mark the specified pages as now being cached
+ */
+static void nfs_fh_mark_pages_cached(void *cookie_netfs_data,
+				     struct address_space *mapping,
+				     struct pagevec *cached_pvec)
+{
+	struct nfs_inode *nfsi = cookie_netfs_data;
+	unsigned long loop;
+
+	dprintk("NFS: nfs_fh_mark_pages_cached: nfs_inode 0x%p pages %ld\n",
+		nfsi, cached_pvec->nr);
+
+	for (loop = 0; loop < cached_pvec->nr; loop++)
+		SetPageNfsCached(cached_pvec->pages[loop]);
+}
+
+/*
+ * get an extra reference on a read context
+ * - this function can be absent if the completion function doesn't
+ *   require a context
+ */
+static void nfs_fh_get_context(void *cookie_netfs_data, void *context)
+{
+	get_nfs_open_context(context);
+}
+
+/*
+ * release an extra reference on a read context
+ * - this function can be absent if the completion function doesn't
+ *   require a context
+ */
+static void nfs_fh_put_context(void *cookie_netfs_data, void *context)
+{
+	if (context)
+		put_nfs_open_context(context);
+}
+
+/*
+ * indication the cookie is no longer uncached
+ * - this function is called when the backing store currently caching a cookie
+ *   is removed
+ * - the netfs should use this to clean up any markers indicating cached pages
+ * - this is mandatory for any object that may have data
+ */
+static void nfs_fh_now_uncached(void *cookie_netfs_data)
+{
+	struct nfs_inode *nfsi = cookie_netfs_data;
+	struct pagevec pvec;
+	pgoff_t first;
+	int loop, nr_pages;
+
+	pagevec_init(&pvec, 0);
+	first = 0;
+
+	dprintk("NFS: nfs_fh_now_uncached: nfs_inode 0x%p\n", nfsi);
+
+	for (;;) {
+		/* grab a bunch of pages to clean */
+		nr_pages = pagevec_lookup(&pvec,
+					  nfsi->vfs_inode.i_mapping,
+					  first,
+					  PAGEVEC_SIZE - pagevec_count(&pvec));
+		if (!nr_pages)
+			break;
+
+		for (loop = 0; loop < nr_pages; loop++)
+			ClearPageNfsCached(pvec.pages[loop]);
+
+		first = pvec.pages[nr_pages - 1]->index + 1;
+
+		pvec.nr = nr_pages;
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+}
+
+/*****************************************************************************/
+/*
+ * get certain file attributes from the netfs data
+ * - this function can be absent for an index
+ * - not permitted to return an error
+ * - the netfs data from the cookie being used as the source is
+ *   presented
+ */
+static void nfs_fh_get_attr(const void *cookie_netfs_data, uint64_t *size)
+{
+	const struct nfs_inode *nfsi = cookie_netfs_data;
+
+	*size = nfsi->vfs_inode.i_size;
+}
+
+/*****************************************************************************/
+/*
+ * get the auxilliary data from netfs data
+ * - this function can be absent if the index carries no state data
+ * - should store the auxilliary data in the buffer
+ * - should return the amount of amount stored
+ * - not permitted to return an error
+ * - the netfs data from the cookie being used as the source is
+ *   presented
+ */
+static uint16_t nfs_fh_get_aux(const void *cookie_netfs_data,
+			       void *buffer, uint16_t bufmax)
+{
+	struct nfs_fh_auxdata auxdata;
+	const struct nfs_inode *nfsi = cookie_netfs_data;
+
+	auxdata.i_size = nfsi->vfs_inode.i_size;
+	auxdata.i_mtime = nfsi->vfs_inode.i_mtime;
+	auxdata.i_ctime = nfsi->vfs_inode.i_ctime;
+
+	if (bufmax > sizeof(auxdata))
+		bufmax = sizeof(auxdata);
+
+	memcpy(buffer, &auxdata, bufmax);
+	return bufmax;
+}
+
+/*****************************************************************************/
+/*
+ * consult the netfs about the state of an object
+ * - this function can be absent if the index carries no state data
+ * - the netfs data from the cookie being used as the target is
+ *   presented, as is the auxilliary data
+ */
+static fscache_checkaux_t nfs_fh_check_aux(void *cookie_netfs_data,
+					   const void *data, uint16_t datalen)
+{
+	struct nfs_fh_auxdata auxdata;
+	struct nfs_inode *nfsi = cookie_netfs_data;
+
+	if (datalen > sizeof(auxdata))
+		return FSCACHE_CHECKAUX_OBSOLETE;
+
+	auxdata.i_size = nfsi->vfs_inode.i_size;
+	auxdata.i_mtime = nfsi->vfs_inode.i_mtime;
+	auxdata.i_ctime = nfsi->vfs_inode.i_ctime;
+
+	if (memcmp(data, &auxdata, datalen) != 0)
+		return FSCACHE_CHECKAUX_OBSOLETE;
+
+	return FSCACHE_CHECKAUX_OKAY;
+}
+
+/*
+ * the primary index for each server is simply made up of a series of NFS file
+ * handles
+ */
+struct fscache_cookie_def nfs_cache_fh_index_def = {
+	.name			= "NFS.fh",
+	.type			= FSCACHE_COOKIE_TYPE_DATAFILE,
+	.get_key		= nfs_fh_get_key,
+	.get_attr		= nfs_fh_get_attr,
+	.get_aux		= nfs_fh_get_aux,
+	.check_aux		= nfs_fh_check_aux,
+	.get_context		= nfs_fh_get_context,
+	.put_context		= nfs_fh_put_context,
+	.mark_pages_cached	= nfs_fh_mark_pages_cached,
+	.now_uncached		= nfs_fh_now_uncached,
+};
+
+static int nfs_file_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+	wait_on_page_fs_misc(page);
+	return 0;
+}
+
+struct vm_operations_struct nfs_fs_vm_operations = {
+	.nopage		= filemap_nopage,
+	.populate	= filemap_populate,
+	.page_mkwrite	= nfs_file_page_mkwrite,
+};
+
+/*
+ * handle completion of a page being stored in the cache
+ */
+void nfs_readpage_to_fscache_complete(struct page *page, void *data, int error)
+{
+	dfprintk(FSCACHE,
+		"NFS:     readpage_to_fscache_complete (p:%p(i:%lx f:%lx)/%d)\n",
+		page, page->index, page->flags, error);
+
+	end_page_fs_misc(page);
+}
+
+/*
+ * handle completion of a page being read from the cache
+ * - called in process (keventd) context
+ */
+void nfs_readpage_from_fscache_complete(struct page *page,
+					void *context,
+					int error)
+{
+	dfprintk(FSCACHE,
+		 "NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n",
+		 page, context, error);
+
+	/* if the read completes with an error, we just unlock the page and let
+	 * the VM reissue the readpage */
+	if (!error) {
+		SetPageUptodate(page);
+		unlock_page(page);
+	} else {
+		error = nfs_readpage_async(context, page->mapping->host, page);
+		if (error)
+			unlock_page(page);
+	}
+}
+
+/*
+ * handle completion of a page being read from the cache
+ * - really need to synchronise the end of writeback, probably using a page
+ *   flag, but for the moment we disable caching on writable files
+ */
+void nfs_writepage_to_fscache_complete(struct page *page,
+				       void *data,
+				       int error)
+{
+}
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
new file mode 100644
index 0000000..69f0f40
--- /dev/null
+++ b/fs/nfs/fscache.h
@@ -0,0 +1,466 @@
+/* fscache.h: NFS filesystem cache interface definitions
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NFS_FSCACHE_H
+#define _NFS_FSCACHE_H
+
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+
+#ifdef CONFIG_NFS_FSCACHE
+#include <linux/fscache.h>
+
+extern struct fscache_netfs nfs_cache_netfs;
+extern struct fscache_cookie_def nfs_cache_server_index_def;
+extern struct fscache_cookie_def nfs_cache_fh_index_def;
+extern struct vm_operations_struct nfs_fs_vm_operations;
+
+extern void nfs_invalidatepage(struct page *, unsigned long);
+extern int nfs_releasepage(struct page *, gfp_t);
+
+extern atomic_t nfs_fscache_to_pages;
+extern atomic_t nfs_fscache_from_pages;
+extern atomic_t nfs_fscache_uncache_page;
+extern int nfs_fscache_from_error;
+extern int nfs_fscache_to_error;
+
+/*
+ * register NFS for caching
+ */
+static inline int nfs_fscache_register(void)
+{
+	return fscache_register_netfs(&nfs_cache_netfs);
+}
+
+/*
+ * unregister NFS for caching
+ */
+static inline void nfs_fscache_unregister(void)
+{
+	fscache_unregister_netfs(&nfs_cache_netfs);
+}
+
+/*
+ * get the per-client index cookie for an NFS client if the appropriate mount
+ * flag was set
+ * - we always try and get an index cookie for the client, but get filehandle
+ *   cookies on a per-superblock basis, depending on the mount flags
+ */
+static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp)
+{
+	/* create a cache index for looking up filehandles */
+	clp->fscache = fscache_acquire_cookie(nfs_cache_netfs.primary_index,
+					      &nfs_cache_server_index_def,
+					      clp);
+	dfprintk(FSCACHE,"NFS: get client cookie (0x%p/0x%p)\n",
+		 clp, clp->fscache);
+}
+
+/*
+ * dispose of a per-client cookie
+ */
+static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp)
+{
+	dfprintk(FSCACHE,"NFS: releasing client cookie (0x%p/0x%p)\n",
+		clp, clp->fscache);
+
+	fscache_relinquish_cookie(clp->fscache, 0);
+	clp->fscache = NULL;
+}
+
+/*
+ * indicate the client caching state as readable text
+ */
+static inline const char *nfs_server_fscache_state(struct nfs_server *server)
+{
+	if (server->nfs_client->fscache && (server->flags & NFS_MOUNT_FSCACHE))
+		return "yes";
+	return "no ";
+}
+
+/*
+ * get the per-filehandle cookie for an NFS inode
+ */
+static inline void nfs_fscache_get_fh_cookie(struct super_block *sb,
+					     struct nfs_inode *nfsi,
+					     int maycache)
+{
+	nfsi->fscache = NULL;
+	if (maycache && (NFS_SB(sb)->flags & NFS_MOUNT_FSCACHE)) {
+		nfsi->fscache = fscache_acquire_cookie(
+			NFS_SB(sb)->nfs_client->fscache,
+			&nfs_cache_fh_index_def,
+			nfsi);
+
+		fscache_set_i_size(nfsi->fscache, nfsi->vfs_inode.i_size);
+
+		dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n",
+			 sb, nfsi, nfsi->fscache);
+	}
+}
+
+/*
+ * change the filesize associated with a per-filehandle cookie
+ */
+static inline void nfs_fscache_set_size(struct nfs_server *server,
+					struct nfs_inode *nfsi,
+					loff_t i_size)
+{
+	fscache_set_i_size(nfsi->fscache, i_size);
+}
+
+/*
+ * replace a per-filehandle cookie due to revalidation detecting a file having
+ * changed on the server
+ */
+static inline void nfs_fscache_renew_fh_cookie(struct nfs_server *server,
+					       struct nfs_inode *nfsi)
+{
+	struct fscache_cookie *old = nfsi->fscache;
+
+	if (nfsi->fscache) {
+		/* retire the current fscache cache and get a new one */
+		fscache_relinquish_cookie(nfsi->fscache, 1);
+
+		nfsi->fscache = fscache_acquire_cookie(
+			server->nfs_client->fscache,
+			&nfs_cache_fh_index_def,
+			nfsi);
+		fscache_set_i_size(nfsi->fscache, nfsi->vfs_inode.i_size);
+
+		dfprintk(FSCACHE,
+			 "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n",
+			 server, nfsi, old, nfsi->fscache);
+	}
+}
+
+/*
+ * release a per-filehandle cookie
+ */
+static inline void nfs_fscache_release_fh_cookie(struct nfs_server *server,
+						 struct nfs_inode *nfsi)
+{
+	dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n",
+		 nfsi, nfsi->fscache);
+
+	fscache_relinquish_cookie(nfsi->fscache, 0);
+	nfsi->fscache = NULL;
+}
+
+/*
+ * retire a per-filehandle cookie, destroying the data attached to it
+ */
+static inline void nfs_fscache_zap_fh_cookie(struct nfs_server *server,
+					     struct nfs_inode *nfsi)
+{
+	dfprintk(FSCACHE,"NFS: zapping cookie (0x%p/0x%p)\n",
+		nfsi, nfsi->fscache);
+
+	fscache_relinquish_cookie(nfsi->fscache, 1);
+	nfsi->fscache = NULL;
+}
+
+/*
+ * turn off the cache with regard to a filehandle cookie if opened for writing,
+ * invalidating all the pages in the page cache relating to the associated
+ * inode to clear the per-page caching
+ */
+static inline void nfs_fscache_disable_fh_cookie(struct inode *inode)
+{
+	if (NFS_I(inode)->fscache) {
+		dfprintk(FSCACHE,
+			 "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode));
+
+		/* Need to invalided any mapped pages that were read in before
+		 * turning off the cache.
+		 */
+		if (inode->i_mapping && inode->i_mapping->nrpages)
+			invalidate_inode_pages2(inode->i_mapping);
+
+		nfs_fscache_zap_fh_cookie(NFS_SERVER(inode), NFS_I(inode));
+	}
+}
+
+/*
+ * install the VM ops for mmap() of an NFS file so that we can hold up writes
+ * to pages on shared writable mappings until the store to the cache is
+ * complete
+ */
+static inline void nfs_fscache_install_vm_ops(struct inode *inode,
+					      struct vm_area_struct *vma)
+{
+	if (NFS_I(inode)->fscache)
+		vma->vm_ops = &nfs_fs_vm_operations;
+}
+
+/*
+ * release the caching state associated with a page
+ */
+static void nfs_fscache_release_page(struct page *page)
+{
+	if (PageNfsCached(page)) {
+		struct nfs_inode *nfsi = NFS_I(page->mapping->host);
+
+		BUG_ON(nfsi->fscache == NULL);
+
+		dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
+			 nfsi->fscache, page, nfsi);
+
+		wait_on_page_fs_misc(page);
+		fscache_uncache_page(nfsi->fscache, page);
+		atomic_inc(&nfs_fscache_uncache_page);
+		ClearPageNfsCached(page);
+	}
+}
+
+/*
+ * release the caching state associated with a page if undergoing complete page
+ * invalidation
+ */
+static inline void nfs_fscache_invalidate_page(struct page *page,
+					       struct inode *inode,
+					       unsigned long offset)
+{
+	if (PageNfsCached(page)) {
+		struct nfs_inode *nfsi = NFS_I(page->mapping->host);
+
+		BUG_ON(!nfsi->fscache);
+
+		dfprintk(FSCACHE,
+			 "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n",
+			 nfsi->fscache, page, nfsi);
+
+		if (offset == 0) {
+			BUG_ON(!PageLocked(page));
+			if (!PageWriteback(page))
+				nfs_fscache_release_page(page);
+		}
+	}
+}
+
+/*
+ * store a newly fetched page in fscache
+ */
+extern void nfs_readpage_to_fscache_complete(struct page *, void *, int);
+
+static inline void nfs_readpage_to_fscache(struct inode *inode,
+					   struct page *page,
+					   int sync)
+{
+	int ret;
+
+	if (PageNfsCached(page)) {
+		dfprintk(FSCACHE,
+			 "NFS: "
+			 "readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n",
+			 NFS_I(inode)->fscache, page, page->index, page->flags,
+			 sync);
+
+		if (TestSetPageFsMisc(page))
+			BUG();
+
+		ret = fscache_write_page(NFS_I(inode)->fscache, page,
+					 nfs_readpage_to_fscache_complete,
+					 NULL, GFP_KERNEL);
+		dfprintk(FSCACHE,
+			 "NFS:     "
+			 "readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n",
+			 page, page->index, page->flags, ret);
+
+		if (ret != 0) {
+			fscache_uncache_page(NFS_I(inode)->fscache, page);
+			atomic_inc(&nfs_fscache_uncache_page);
+			ClearPageNfsCached(page);
+			end_page_fs_misc(page);
+			nfs_fscache_to_error = ret;
+		} else {
+			atomic_inc(&nfs_fscache_to_pages);
+		}
+	}
+}
+
+/*
+ * retrieve a page from fscache
+ */
+extern void nfs_readpage_from_fscache_complete(struct page *, void *, int);
+
+static inline
+int nfs_readpage_from_fscache(struct nfs_open_context *ctx,
+			      struct inode *inode,
+			      struct page *page)
+{
+	int ret;
+
+	if (!NFS_I(inode)->fscache)
+		return 1;
+
+	dfprintk(FSCACHE,
+		 "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n",
+		 NFS_I(inode)->fscache, page, page->index, page->flags, inode);
+
+	ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache,
+					 page,
+					 nfs_readpage_from_fscache_complete,
+					 ctx,
+					 GFP_KERNEL);
+
+	switch (ret) {
+	case 0: /* read BIO submitted (page in fscache) */
+		dfprintk(FSCACHE,
+			 "NFS:    readpage_from_fscache: BIO submitted\n");
+		atomic_inc(&nfs_fscache_from_pages);
+		return ret;
+
+	case -ENOBUFS: /* inode not in cache */
+	case -ENODATA: /* page not in cache */
+		dfprintk(FSCACHE,
+			 "NFS:    readpage_from_fscache error %d\n", ret);
+		return 1;
+
+	default:
+		dfprintk(FSCACHE, "NFS:    readpage_from_fscache %d\n", ret);
+		nfs_fscache_from_error = ret;
+	}
+	return ret;
+}
+
+/*
+ * retrieve a set of pages from fscache
+ */
+static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx,
+					     struct inode *inode,
+					     struct address_space *mapping,
+					     struct list_head *pages,
+					     unsigned *nr_pages)
+{
+	int ret, npages = *nr_pages;
+
+	if (!NFS_I(inode)->fscache)
+		return 1;
+
+	dfprintk(FSCACHE,
+		 "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
+		 NFS_I(inode)->fscache, *nr_pages, inode);
+
+	ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache,
+					  mapping, pages, nr_pages,
+					  nfs_readpage_from_fscache_complete,
+					  ctx,
+					  mapping_gfp_mask(mapping));
+
+
+	switch (ret) {
+	case 0: /* read BIO submitted (page in fscache) */
+		BUG_ON(!list_empty(pages));
+		BUG_ON(*nr_pages != 0);
+		dfprintk(FSCACHE,
+			 "NFS: nfs_getpages_from_fscache: BIO submitted\n");
+
+		atomic_add(npages, &nfs_fscache_from_pages);
+		return ret;
+
+	case -ENOBUFS: /* inode not in cache */
+	case -ENODATA: /* page not in cache */
+		dfprintk(FSCACHE,
+			 "NFS: nfs_getpages_from_fscache: no page: %d\n", ret);
+		return 1;
+
+	default:
+		dfprintk(FSCACHE,
+			 "NFS: nfs_getpages_from_fscache: ret  %d\n", ret);
+		nfs_fscache_from_error = ret;
+	}
+
+	return ret;
+}
+
+/*
+ * store an updated page in fscache
+ */
+extern void nfs_writepage_to_fscache_complete(struct page *page, void *data, int error);
+
+static inline void nfs_writepage_to_fscache(struct inode *inode,
+					    struct page *page)
+{
+	int error;
+
+	if (PageNfsCached(page) && NFS_I(inode)->fscache) {
+		dfprintk(FSCACHE,
+			 "NFS: writepage_to_fscache (0x%p/0x%p/0x%p)\n",
+			 NFS_I(inode)->fscache, page, inode);
+
+		error = fscache_write_page(NFS_I(inode)->fscache, page,
+					   nfs_writepage_to_fscache_complete,
+					   NULL, GFP_KERNEL);
+		if (error != 0) {
+			dfprintk(FSCACHE,
+				 "NFS:    fscache_write_page error %d\n",
+				 error);
+			fscache_uncache_page(NFS_I(inode)->fscache, page);
+		}
+	}
+}
+
+#else /* CONFIG_NFS_FSCACHE */
+static inline int nfs_fscache_register(void) { return 0; }
+static inline void nfs_fscache_unregister(void) {}
+static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {}
+static inline void nfs4_fscache_get_client_cookie(struct nfs_client *clp) {}
+static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
+static inline const char *nfs_server_fscache_state(struct nfs_server *server) { return "no "; }
+
+static inline void nfs_fscache_get_fh_cookie(struct super_block *sb,
+					     struct nfs_inode *nfsi,
+					     int maycache)
+{
+}
+static inline void nfs_fscache_set_size(struct nfs_server *server,
+					struct nfs_inode *nfsi,
+					loff_t i_size)
+{
+}
+static inline void nfs_fscache_release_fh_cookie(struct nfs_server *server,
+						 struct nfs_inode *nfsi)
+{
+}
+static inline void nfs_fscache_zap_fh_cookie(struct nfs_server *server, struct nfs_inode *nfsi) {}
+static inline void nfs_fscache_renew_fh_cookie(struct nfs_server *server, struct nfs_inode *nfsi) {}
+static inline void nfs_fscache_disable_fh_cookie(struct inode *inode) {}
+static inline void nfs_fscache_install_vm_ops(struct inode *inode, struct vm_area_struct *vma) {}
+static inline void nfs_fscache_release_page(struct page *page) {}
+static inline void nfs_fscache_invalidate_page(struct page *page,
+					       struct inode *inode,
+					       unsigned long offset)
+{
+}
+static inline void nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) {}
+static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx,
+					    struct inode *inode, struct page *page)
+{
+	return -ENOBUFS;
+}
+static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx,
+					     struct inode *inode,
+					     struct address_space *mapping,
+					     struct list_head *pages,
+					     unsigned *nr_pages)
+{
+	return -ENOBUFS;
+}
+
+static inline void nfs_writepage_to_fscache(struct inode *inode, struct page *page)
+{
+	BUG_ON(PageNfsCached(page));
+}
+
+#endif /* CONFIG_NFS_FSCACHE */
+#endif /* _NFS_FSCACHE_H */
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
new file mode 100644
index 0000000..977e590
--- /dev/null
+++ b/fs/nfs/getroot.c
@@ -0,0 +1,306 @@
+/* getroot.c: get the root dentry for an NFS mount
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/nfs_idmap.h>
+#include <linux/vfs.h>
+#include <linux/namei.h>
+#include <linux/namespace.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "nfs4_fs.h"
+#include "delegation.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY		NFSDBG_CLIENT
+#define NFS_PARANOIA 1
+
+/*
+ * get an NFS2/NFS3 root dentry from the root filehandle
+ */
+struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+{
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_fsinfo fsinfo;
+	struct nfs_fattr fattr;
+	struct dentry *mntroot;
+	struct inode *inode;
+	int error;
+
+	/* create a dummy root dentry with dummy inode for this superblock */
+	if (!sb->s_root) {
+		struct nfs_fh dummyfh;
+		struct dentry *root;
+		struct inode *iroot;
+
+		memset(&dummyfh, 0, sizeof(dummyfh));
+		memset(&fattr, 0, sizeof(fattr));
+		nfs_fattr_init(&fattr);
+		fattr.valid = NFS_ATTR_FATTR;
+		fattr.type = NFDIR;
+		fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
+		fattr.nlink = 2;
+
+		iroot = nfs_fhget(sb, &dummyfh, &fattr);
+		if (IS_ERR(iroot))
+			return ERR_PTR(PTR_ERR(iroot));
+
+		root = d_alloc_root(iroot);
+		if (!root) {
+			iput(iroot);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		sb->s_root = root;
+	}
+
+	/* get the actual root for this mount */
+	fsinfo.fattr = &fattr;
+
+	error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+	if (error < 0) {
+		dprintk("nfs_get_root: getattr error = %d\n", -error);
+		return ERR_PTR(error);
+	}
+
+	inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
+	if (IS_ERR(inode)) {
+		dprintk("nfs_get_root: get root inode failed\n");
+		return ERR_PTR(PTR_ERR(inode));
+	}
+
+	/* root dentries normally start off anonymous and get spliced in later
+	 * if the dentry tree reaches them; however if the dentry already
+	 * exists, we'll pick it up at this point and use it as the root
+	 */
+	mntroot = d_alloc_anon(inode);
+	if (!mntroot) {
+		iput(inode);
+		dprintk("nfs_get_root: get root dentry failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (!mntroot->d_op)
+		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
+
+	return mntroot;
+}
+
+#ifdef CONFIG_NFS_V4
+
+/*
+ * Do a simple pathwalk from the root FH of the server to the nominated target
+ * of the mountpoint
+ * - give error on symlinks
+ * - give error on ".." occurring in the path
+ * - follow traversals
+ */
+int nfs4_path_walk(struct nfs_server *server,
+		   struct nfs_fh *mntfh,
+		   const char *path)
+{
+	struct nfs_fsinfo fsinfo;
+	struct nfs_fattr fattr;
+	struct nfs_fh lastfh;
+	struct qstr name;
+	int ret;
+	//int referral_count = 0;
+
+	dprintk("--> nfs4_path_walk(,,%s)\n", path);
+
+	fsinfo.fattr = &fattr;
+	nfs_fattr_init(&fattr);
+
+	if (*path++ != '/') {
+		dprintk("nfs4_get_root: Path does not begin with a slash\n");
+		return -EINVAL;
+	}
+
+	/* Start by getting the root filehandle from the server */
+	ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+	if (ret < 0) {
+		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+		return ret;
+	}
+
+	if (fattr.type != NFDIR) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " getroot encountered non-directory\n");
+		return -ENOTDIR;
+	}
+
+	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " getroot obtained referral\n");
+		return -EREMOTE;
+	}
+
+next_component:
+	dprintk("Next: %s\n", path);
+
+	/* extract the next bit of the path */
+	if (!*path)
+		goto path_walk_complete;
+
+	name.name = path;
+	while (*path && *path != '/')
+		path++;
+	name.len = path - (const char *) name.name;
+
+eat_dot_dir:
+	while (*path == '/')
+		path++;
+
+	if (path[0] == '.' && (path[1] == '/' || !path[1])) {
+		path += 2;
+		goto eat_dot_dir;
+	}
+
+	if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2])
+	    ) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " Mount path contains reference to \"..\"\n");
+		return -EINVAL;
+	}
+
+	/* lookup the next FH in the sequence */
+	memcpy(&lastfh, mntfh, sizeof(lastfh));
+
+	dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path);
+
+	ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name,
+						    mntfh, &fattr);
+	if (ret < 0) {
+		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+		return ret;
+	}
+
+	if (fattr.type != NFDIR) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " lookupfh encountered non-directory\n");
+		return -ENOTDIR;
+	}
+
+	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " lookupfh obtained referral\n");
+		return -EREMOTE;
+	}
+
+	goto next_component;
+
+path_walk_complete:
+	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+	dprintk("<-- nfs4_path_walk() = 0\n");
+	return 0;
+}
+
+/*
+ * get an NFS4 root dentry from the root filehandle
+ */
+struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+{
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_fattr fattr;
+	struct dentry *mntroot;
+	struct inode *inode;
+	int error;
+
+	dprintk("--> nfs4_get_root()\n");
+
+	/* create a dummy root dentry with dummy inode for this superblock */
+	if (!sb->s_root) {
+		struct nfs_fh dummyfh;
+		struct dentry *root;
+		struct inode *iroot;
+
+		memset(&dummyfh, 0, sizeof(dummyfh));
+		memset(&fattr, 0, sizeof(fattr));
+		nfs_fattr_init(&fattr);
+		fattr.valid = NFS_ATTR_FATTR;
+		fattr.type = NFDIR;
+		fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
+		fattr.nlink = 2;
+
+		iroot = nfs_fhget(sb, &dummyfh, &fattr);
+		if (IS_ERR(iroot))
+			return ERR_PTR(PTR_ERR(iroot));
+
+		root = d_alloc_root(iroot);
+		if (!root) {
+			iput(iroot);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		sb->s_root = root;
+	}
+
+	/* get the info about the server and filesystem */
+	error = nfs4_server_capabilities(server, mntfh);
+	if (error < 0) {
+		dprintk("nfs_get_root: getcaps error = %d\n",
+			-error);
+		return ERR_PTR(error);
+	}
+
+	/* get the actual root for this mount */
+	error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
+	if (error < 0) {
+		dprintk("nfs_get_root: getattr error = %d\n", -error);
+		return ERR_PTR(error);
+	}
+
+	inode = nfs_fhget(sb, mntfh, &fattr);
+	if (IS_ERR(inode)) {
+		dprintk("nfs_get_root: get root inode failed\n");
+		return ERR_PTR(PTR_ERR(inode));
+	}
+
+	/* root dentries normally start off anonymous and get spliced in later
+	 * if the dentry tree reaches them; however if the dentry already
+	 * exists, we'll pick it up at this point and use it as the root
+	 */
+	mntroot = d_alloc_anon(inode);
+	if (!mntroot) {
+		iput(inode);
+		dprintk("nfs_get_root: get root dentry failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (!mntroot->d_op)
+		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
+
+	dprintk("<-- nfs4_get_root()\n");
+	return mntroot;
+}
+
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b81e7ed..7cd17dc 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -57,6 +57,20 @@ #define IDMAP_HASH_SZ          128
 /* Default cache timeout is 10 minutes */
 unsigned int nfs_idmap_cache_timeout = 600 * HZ;
 
+static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int num = simple_strtol(val, &endp, 0);
+	int jif = num * HZ;
+	if (endp == val || *endp || num < 0 || jif < num)
+		return -EINVAL;
+	*((int *)kp->arg) = jif;
+	return 0;
+}
+
+module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
+		 &nfs_idmap_cache_timeout, 0644);
+
 struct idmap_hashent {
 	unsigned long ih_expires;
 	__u32 ih_id;
@@ -94,15 +108,16 @@ static struct rpc_pipe_ops idmap_upcall_
         .destroy_msg    = idmap_pipe_destroy_msg,
 };
 
-void
-nfs_idmap_new(struct nfs4_client *clp)
+int
+nfs_idmap_new(struct nfs_client *clp)
 {
 	struct idmap *idmap;
+	int error;
+
+	BUG_ON(clp->cl_idmap != NULL);
 
-	if (clp->cl_idmap != NULL)
-		return;
         if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
-                return;
+                return -ENOMEM;
 
 	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
 	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
@@ -110,8 +125,9 @@ nfs_idmap_new(struct nfs4_client *clp)
         idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
 	    idmap, &idmap_upcall_ops, 0);
         if (IS_ERR(idmap->idmap_dentry)) {
+		error = PTR_ERR(idmap->idmap_dentry);
 		kfree(idmap);
-		return;
+		return error;
 	}
 
         mutex_init(&idmap->idmap_lock);
@@ -121,10 +137,11 @@ nfs_idmap_new(struct nfs4_client *clp)
 	idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
 
 	clp->cl_idmap = idmap;
+	return 0;
 }
 
 void
-nfs_idmap_delete(struct nfs4_client *clp)
+nfs_idmap_delete(struct nfs_client *clp)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
@@ -479,27 +496,27 @@ static unsigned int fnvhash32(const void
 	return (hash);
 }
 
-int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
 }
 
-int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
 }
 
-int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf)
+int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
 }
-int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf)
+int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d349fb2..dcf916b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -88,6 +88,8 @@ void nfs_clear_inode(struct inode *inode
 	cred = nfsi->cache_access.cred;
 	if (cred)
 		put_rpccred(cred);
+
+	nfs_fscache_release_fh_cookie(NFS_SERVER(inode), nfsi);
 	BUG_ON(atomic_read(&nfsi->data_updates) != 0);
 }
 
@@ -134,6 +136,8 @@ void nfs_zap_caches(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	nfs_zap_caches_locked(inode);
 	spin_unlock(&inode->i_lock);
+
+	nfs_fscache_zap_fh_cookie(NFS_SERVER(inode), NFS_I(inode));
 }
 
 static void nfs_zap_acl_cache(struct inode *inode)
@@ -212,6 +216,7 @@ nfs_fhget(struct super_block *sb, struct
 	};
 	struct inode *inode = ERR_PTR(-ENOENT);
 	unsigned long hash;
+	int maycache = 1;
 
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
 		goto out_no_inode;
@@ -242,13 +247,13 @@ nfs_fhget(struct super_block *sb, struct
 		/* Why so? Because we want revalidate for devices/FIFOs, and
 		 * that's precisely what we have in nfs_file_inode_operations.
 		 */
-		inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops;
+		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
 		if (S_ISREG(inode->i_mode)) {
 			inode->i_fop = &nfs_file_operations;
 			inode->i_data.a_ops = &nfs_file_aops;
 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
 		} else if (S_ISDIR(inode->i_mode)) {
-			inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops;
+			inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
 			inode->i_fop = &nfs_dir_operations;
 			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
 			    && fattr->size <= NFS_LIMIT_READDIRPLUS)
@@ -260,6 +265,7 @@ nfs_fhget(struct super_block *sb, struct
 				else
 					inode->i_op = &nfs_mountpoint_inode_operations;
 				inode->i_fop = NULL;
+				maycache = 0;
 			}
 		} else if (S_ISLNK(inode->i_mode))
 			inode->i_op = &nfs_symlink_inode_operations;
@@ -292,6 +298,8 @@ nfs_fhget(struct super_block *sb, struct
 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
 		nfsi->cache_access.cred = NULL;
 
+		nfs_fscache_get_fh_cookie(sb, nfsi, maycache);
+
 		unlock_new_inode(inode);
 	} else
 		nfs_refresh_inode(inode, fattr);
@@ -374,6 +382,7 @@ void nfs_setattr_update_inode(struct ino
 	if ((attr->ia_valid & ATTR_SIZE) != 0) {
 		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
 		inode->i_size = attr->ia_size;
+		nfs_fscache_set_size(NFS_SERVER(inode), NFS_I(inode), inode->i_size);
 		vmtruncate(inode, attr->ia_size);
 	}
 }
@@ -556,6 +565,8 @@ int nfs_open(struct inode *inode, struct
 	ctx->mode = filp->f_mode;
 	nfs_file_set_open_context(filp, ctx);
 	put_nfs_open_context(ctx);
+	if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
+		nfs_fscache_disable_fh_cookie(inode);
 	return 0;
 }
 
@@ -694,6 +705,8 @@ int nfs_revalidate_mapping(struct inode 
 		}
 		spin_unlock(&inode->i_lock);
 
+		nfs_fscache_renew_fh_cookie(NFS_SERVER(inode), nfsi);
+
 		dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
 				inode->i_sb->s_id,
 				(long long)NFS_FILEID(inode));
@@ -927,11 +940,13 @@ static int nfs_update_inode(struct inode
 			if (data_stable) {
 				inode->i_size = new_isize;
 				invalid |= NFS_INO_INVALID_DATA;
+				nfs_fscache_set_size(NFS_SERVER(inode), nfsi, inode->i_size);
 			}
 			invalid |= NFS_INO_INVALID_ATTR;
 		} else if (new_isize > cur_isize) {
 			inode->i_size = new_isize;
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+			nfs_fscache_set_size(NFS_SERVER(inode), nfsi, inode->i_size);
 		}
 		nfsi->cache_change_attribute = jiffies;
 		dprintk("NFS: isize change on server for file %s/%ld\n",
@@ -1025,7 +1040,7 @@ #endif
  out_fileid:
 	printk(KERN_ERR "NFS: server %s error: fileid changed\n"
 		"fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
-		NFS_SERVER(inode)->hostname, inode->i_sb->s_id,
+		NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
 		(long long)nfsi->fileid, (long long)fattr->fileid);
 	goto out_err;
 }
@@ -1144,6 +1159,14 @@ static int __init init_nfs_fs(void)
 {
 	int err;
 
+	err = nfs_fscache_register();
+	if (err < 0)
+		goto out6;
+
+	err = nfs_fs_proc_init();
+	if (err)
+		goto out5;
+
 	err = nfs_init_nfspagecache();
 	if (err)
 		goto out4;
@@ -1184,6 +1207,10 @@ out2:
 out3:
 	nfs_destroy_nfspagecache();
 out4:
+	nfs_fs_proc_exit();
+out5:
+	nfs_fscache_unregister();
+out6:
 	return err;
 }
 
@@ -1194,10 +1221,12 @@ static void __exit exit_nfs_fs(void)
 	nfs_destroy_readpagecache();
 	nfs_destroy_inodecache();
 	nfs_destroy_nfspagecache();
+	nfs_fscache_unregister();
 #ifdef CONFIG_PROC_FS
 	rpc_proc_unregister("nfs");
 #endif
 	unregister_nfs_fs();
+	nfs_fs_proc_exit();
 }
 
 /* Not quite true; I just maintain it */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e4f4e5d..18febb4 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -4,6 +4,42 @@
 
 #include <linux/mount.h>
 
+#define NFS_PAGE_WRITING	0
+#define NFS_PAGE_CACHED		1
+
+#define PageNfsBit(bit, page)		test_bit(bit, &(page)->private)
+
+#define SetPageNfsBit(bit, page)		\
+do {						\
+	SetPagePrivate((page));			\
+	set_bit(bit, &(page)->private);		\
+} while(0)
+
+#define ClearPageNfsBit(bit, page)		\
+do {						\
+	clear_bit(bit, &(page)->private);	\
+} while(0)
+
+#define PageNfsWriting(page)		PageNfsBit(NFS_PAGE_WRITING, (page))
+#define SetPageNfsWriting(page)		SetPageNfsBit(NFS_PAGE_WRITING, (page))
+#define ClearPageNfsWriting(page)	ClearPageNfsBit(NFS_PAGE_WRITING, (page))
+
+#define PageNfsCached(page)		PageNfsBit(NFS_PAGE_CACHED, (page))
+#define SetPageNfsCached(page)		SetPageNfsBit(NFS_PAGE_CACHED, (page))
+#define ClearPageNfsCached(page)	ClearPageNfsBit(NFS_PAGE_CACHED, (page))
+
+struct nfs_string;
+struct nfs_mount_data;
+struct nfs4_mount_data;
+
+/* Maximum number of readahead requests
+ * FIXME: this should really be a sysctl so that users may tune it to suit
+ *        their needs. People that do NFS over a slow network, might for
+ *        instance want to reduce it to something closer to 1 for improved
+ *        interactive response.
+ */
+#define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
+
 struct nfs_clone_mount {
 	const struct super_block *sb;
 	const struct dentry *dentry;
@@ -15,7 +51,45 @@ struct nfs_clone_mount {
 	rpc_authflavor_t authflavor;
 };
 
-/* namespace-nfs4.c */
+/*
+ * include filesystem caching stuff here
+ */
+#include "fscache.h"
+
+/* client.c */
+extern struct rpc_program nfs_program;
+
+extern void nfs_put_client(struct nfs_client *);
+extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
+extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *,
+					    struct nfs_fh *);
+extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *,
+					     const char *,
+					     const struct sockaddr_in *,
+					     const char *,
+					     const char *,
+					     rpc_authflavor_t,
+					     struct nfs_fh *);
+extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
+						      struct nfs_fh *);
+extern void nfs_free_server(struct nfs_server *server);
+extern struct nfs_server *nfs_clone_server(struct nfs_server *,
+					   struct nfs_fh *,
+					   struct nfs_fattr *);
+#ifdef CONFIG_PROC_FS
+extern int __init nfs_fs_proc_init(void);
+extern void nfs_fs_proc_exit(void);
+#else
+static inline int nfs_fs_proc_init(void)
+{
+	return 0;
+}
+static inline void nfs_fs_proc_exit(void)
+{
+}
+#endif
+
+/* nfs4namespace.c */
 #ifdef CONFIG_NFS_V4
 extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry);
 #else
@@ -46,6 +120,7 @@ #define nfs_destroy_directcache() do {} 
 #endif
 
 /* nfs2xdr.c */
+extern int nfs_stat_to_errno(int);
 extern struct rpc_procinfo nfs_procedures[];
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
@@ -54,8 +129,9 @@ extern struct rpc_procinfo nfs3_procedur
 extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 
 /* nfs4xdr.c */
-extern int nfs_stat_to_errno(int);
+#ifdef CONFIG_NFS_V4
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
+#endif
 
 /* nfs4proc.c */
 #ifdef CONFIG_NFS_V4
@@ -76,10 +152,10 @@ extern void nfs4_clear_inode(struct inod
 #endif
 
 /* super.c */
-extern struct file_system_type nfs_referral_nfs4_fs_type;
-extern struct file_system_type clone_nfs_fs_type;
+extern struct file_system_type nfs_xdev_fs_type;
 #ifdef CONFIG_NFS_V4
-extern struct file_system_type clone_nfs4_fs_type;
+extern struct file_system_type nfs4_xdev_fs_type;
+extern struct file_system_type nfs4_referral_fs_type;
 #endif
 
 extern struct rpc_stat nfs_rpcstat;
@@ -88,30 +164,33 @@ extern int __init register_nfs_fs(void);
 extern void __exit unregister_nfs_fs(void);
 
 /* namespace.c */
-extern char *nfs_path(const char *base, const struct dentry *dentry,
+extern char *nfs_path(const char *base,
+		      const struct dentry *droot,
+		      const struct dentry *dentry,
 		      char *buffer, ssize_t buflen);
 
-/*
- * Determine the mount path as a string
- */
-static inline char *
-nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen)
-{
+/* getroot.c */
+extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *);
 #ifdef CONFIG_NFS_V4
-	return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen);
-#else
-	return NULL;
+extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *);
+
+extern int nfs4_path_walk(struct nfs_server *server,
+			  struct nfs_fh *mntfh,
+			  const char *path);
 #endif
-}
+
+/* read.c */
+extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, struct page *);
 
 /*
  * Determine the device name as a string
  */
 static inline char *nfs_devname(const struct vfsmount *mnt_parent,
-			 const struct dentry *dentry,
-			 char *buffer, ssize_t buflen)
+				const struct dentry *dentry,
+				char *buffer, ssize_t buflen)
 {
-	return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen);
+	return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root,
+			dentry, buffer, buflen);
 }
 
 /*
@@ -167,20 +246,3 @@ void nfs_super_set_maxbytes(struct super
 	if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0)
 		sb->s_maxbytes = MAX_LFS_FILESIZE;
 }
-
-/*
- * Check if the string represents a "valid" IPv4 address
- */
-static inline int valid_ipaddr4(const char *buf)
-{
-	int rc, count, in[4];
-
-	rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
-	if (rc != 4)
-		return -EINVAL;
-	for (count = 0; count < 4; count++) {
-		if (in[count] > 255)
-			return -EINVAL;
-	}
-	return 0;
-}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 19b98ca..271a95d 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -2,6 +2,7 @@
  * linux/fs/nfs/namespace.c
  *
  * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ * - Modified by David Howells <dhowells@redhat.com>
  *
  * NFS namespace
  */
@@ -28,6 +29,7 @@ int nfs_mountpoint_expiry_timeout = 500 
 /*
  * nfs_path - reconstruct the path given an arbitrary dentry
  * @base - arbitrary string to prepend to the path
+ * @droot - pointer to root dentry for mountpoint
  * @dentry - pointer to dentry
  * @buffer - result buffer
  * @buflen - length of buffer
@@ -38,7 +40,9 @@ int nfs_mountpoint_expiry_timeout = 500 
  * This is mainly for use in figuring out the path on the
  * server side when automounting on top of an existing partition.
  */
-char *nfs_path(const char *base, const struct dentry *dentry,
+char *nfs_path(const char *base,
+	       const struct dentry *droot,
+	       const struct dentry *dentry,
 	       char *buffer, ssize_t buflen)
 {
 	char *end = buffer+buflen;
@@ -47,7 +51,7 @@ char *nfs_path(const char *base, const s
 	*--end = '\0';
 	buflen--;
 	spin_lock(&dcache_lock);
-	while (!IS_ROOT(dentry)) {
+	while (!IS_ROOT(dentry) && dentry != droot) {
 		namelen = dentry->d_name.len;
 		buflen -= namelen + 1;
 		if (buflen < 0)
@@ -94,15 +98,17 @@ static void * nfs_follow_mountpoint(stru
 	struct nfs_fattr fattr;
 	int err;
 
+	dprintk("--> nfs_follow_mountpoint()\n");
+
 	BUG_ON(IS_ROOT(dentry));
 	dprintk("%s: enter\n", __FUNCTION__);
 	dput(nd->dentry);
 	nd->dentry = dget(dentry);
-	if (d_mountpoint(nd->dentry))
-		goto out_follow;
+
 	/* Look it up again */
 	parent = dget_parent(nd->dentry);
-	err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr);
+	err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name,
+						  &fh, &fattr);
 	dput(parent);
 	if (err != 0)
 		goto out_err;
@@ -130,6 +136,8 @@ static void * nfs_follow_mountpoint(stru
 	schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
 out:
 	dprintk("%s: done, returned %d\n", __FUNCTION__, err);
+
+	dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
 	return ERR_PTR(err);
 out_err:
 	path_release(nd);
@@ -170,22 +178,23 @@ void nfs_release_automount_timer(void)
 /*
  * Clone a mountpoint of the appropriate type
  */
-static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname,
+static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
+					   const char *devname,
 					   struct nfs_clone_mount *mountdata)
 {
 #ifdef CONFIG_NFS_V4
 	struct vfsmount *mnt = NULL;
-	switch (server->rpc_ops->version) {
+	switch (server->nfs_client->rpc_ops->version) {
 		case 2:
 		case 3:
-			mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
+			mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
 			break;
 		case 4:
-			mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata);
+			mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata);
 	}
 	return mnt;
 #else
-	return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
+	return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
 #endif
 }
 
@@ -211,6 +220,8 @@ struct vfsmount *nfs_do_submount(const s
 	char *page = (char *) __get_free_page(GFP_USER);
 	char *devname;
 
+	dprintk("--> nfs_do_submount()\n");
+
 	dprintk("%s: submounting on %s/%s\n", __FUNCTION__,
 			dentry->d_parent->d_name.name,
 			dentry->d_name.name);
@@ -225,5 +236,7 @@ free_page:
 	free_page((unsigned long)page);
 out:
 	dprintk("%s: done\n", __FUNCTION__);
+
+	dprintk("<-- nfs_do_submount() = %p\n", mnt);
 	return mnt;
 }
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7143b1f..b29543d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -81,7 +81,7 @@ do_proc_get_root(struct rpc_clnt *client
 }
 
 /*
- * Bare-bones access to getattr: this is for nfs_read_super.
+ * Bare-bones access to getattr: this is for nfs_get_root/nfs_get_sb
  */
 static int
 nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -90,8 +90,8 @@ nfs3_proc_get_root(struct nfs_server *se
 	int	status;
 
 	status = do_proc_get_root(server->client, fhandle, info);
-	if (status && server->client_sys != server->client)
-		status = do_proc_get_root(server->client_sys, fhandle, info);
+	if (status && server->nfs_client->cl_rpcclient != server->client)
+		status = do_proc_get_root(server->nfs_client->cl_rpcclient, fhandle, info);
 	return status;
 }
 
@@ -785,7 +785,7 @@ nfs3_proc_fsinfo(struct nfs_server *serv
 
 	dprintk("NFS call  fsinfo\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	return status;
 }
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9a10286..61095fe 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -43,55 +43,6 @@ enum nfs4_client_state {
 };
 
 /*
- * The nfs4_client identifies our client state to the server.
- */
-struct nfs4_client {
-	struct list_head	cl_servers;	/* Global list of servers */
-	struct in_addr		cl_addr;	/* Server identifier */
-	u64			cl_clientid;	/* constant */
-	nfs4_verifier		cl_confirm;
-	unsigned long		cl_state;
-
-	u32			cl_lockowner_id;
-
-	/*
-	 * The following rwsem ensures exclusive access to the server
-	 * while we recover the state following a lease expiration.
-	 */
-	struct rw_semaphore	cl_sem;
-
-	struct list_head	cl_delegations;
-	struct list_head	cl_state_owners;
-	struct list_head	cl_unused;
-	int			cl_nunused;
-	spinlock_t		cl_lock;
-	atomic_t		cl_count;
-
-	struct rpc_clnt *	cl_rpcclient;
-
-	struct list_head	cl_superblocks;	/* List of nfs_server structs */
-
-	unsigned long		cl_lease_time;
-	unsigned long		cl_last_renewal;
-	struct work_struct	cl_renewd;
-	struct work_struct	cl_recoverd;
-
-	struct rpc_wait_queue	cl_rpcwaitq;
-
-	/* used for the setclientid verifier */
-	struct timespec		cl_boot_time;
-
-	/* idmapper */
-	struct idmap *		cl_idmap;
-
-	/* Our own IP address, as a null-terminated string.
-	 * This is used to generate the clientid, and the callback address.
-	 */
-	char			cl_ipaddr[16];
-	unsigned char		cl_id_uniquifier;
-};
-
-/*
  * struct rpc_sequence ensures that RPC calls are sent in the exact
  * order that they appear on the list.
  */
@@ -127,7 +78,7 @@ static inline void nfs_confirm_seqid(str
 struct nfs4_state_owner {
 	spinlock_t	     so_lock;
 	struct list_head     so_list;	 /* per-clientid list of state_owners */
-	struct nfs4_client   *so_client;
+	struct nfs_client    *so_client;
 	u32                  so_id;      /* 32-bit identifier, unique */
 	atomic_t	     so_count;
 
@@ -210,10 +161,10 @@ extern ssize_t nfs4_listxattr(struct den
 
 /* nfs4proc.c */
 extern int nfs4_map_errors(int err);
-extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short, struct rpc_cred *);
-extern int nfs4_proc_setclientid_confirm(struct nfs4_client *, struct rpc_cred *);
-extern int nfs4_proc_async_renew(struct nfs4_client *, struct rpc_cred *);
-extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *);
+extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
+extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
@@ -231,19 +182,14 @@ extern const u32 nfs4_fsinfo_bitmap[2];
 extern const u32 nfs4_fs_locations_bitmap[2];
 
 /* nfs4renewd.c */
-extern void nfs4_schedule_state_renewal(struct nfs4_client *);
+extern void nfs4_schedule_state_renewal(struct nfs_client *);
 extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
-extern void nfs4_kill_renewd(struct nfs4_client *);
+extern void nfs4_kill_renewd(struct nfs_client *);
 extern void nfs4_renew_state(void *);
 
 /* nfs4state.c */
-extern void init_nfsv4_state(struct nfs_server *);
-extern void destroy_nfsv4_state(struct nfs_server *);
-extern struct nfs4_client *nfs4_get_client(struct in_addr *);
-extern void nfs4_put_client(struct nfs4_client *clp);
-extern struct nfs4_client *nfs4_find_client(struct in_addr *);
-struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp);
-extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
+struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
+extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
 
 extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
@@ -252,7 +198,7 @@ extern struct nfs4_state * nfs4_get_open
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, mode_t);
 extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
-extern void nfs4_schedule_state_recovery(struct nfs4_client *);
+extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
@@ -276,10 +222,6 @@ extern struct svc_version nfs4_callback_
 
 #else
 
-#define init_nfsv4_state(server)  do { } while (0)
-#define destroy_nfsv4_state(server)       do { } while (0)
-#define nfs4_put_state_owner(inode, owner) do { } while (0)
-#define nfs4_put_open_state(state) do { } while (0)
 #define nfs4_close_state(a, b) do { } while (0)
 
 #endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index ea38d27..24e47f3 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -2,6 +2,7 @@
  * linux/fs/nfs/nfs4namespace.c
  *
  * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ * - Modified by David Howells <dhowells@redhat.com>
  *
  * NFSv4 namespace
  */
@@ -23,7 +24,7 @@ #define NFSDBG_FACILITY		NFSDBG_VFS
 /*
  * Check if fs_root is valid
  */
-static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname,
+static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
 					 char *buffer, ssize_t buflen)
 {
 	char *end = buffer + buflen;
@@ -34,7 +35,7 @@ static inline char *nfs4_pathname_string
 
 	n = pathname->ncomponents;
 	while (--n >= 0) {
-		struct nfs4_string *component = &pathname->components[n];
+		const struct nfs4_string *component = &pathname->components[n];
 		buflen -= component->len + 1;
 		if (buflen < 0)
 			goto Elong;
@@ -47,6 +48,68 @@ Elong:
 	return ERR_PTR(-ENAMETOOLONG);
 }
 
+/*
+ * Determine the mount path as a string
+ */
+static char *nfs4_path(const struct vfsmount *mnt_parent,
+		       const struct dentry *dentry,
+		       char *buffer, ssize_t buflen)
+{
+	const char *srvpath;
+
+	srvpath = strchr(mnt_parent->mnt_devname, ':');
+	if (srvpath)
+		srvpath++;
+	else
+		srvpath = mnt_parent->mnt_devname;
+
+	return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen);
+}
+
+/*
+ * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we
+ * believe to be the server path to this dentry
+ */
+static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
+				const struct dentry *dentry,
+				const struct nfs4_fs_locations *locations,
+				char *page, char *page2)
+{
+	const char *path, *fs_path;
+
+	path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
+	if (IS_ERR(fs_path))
+		return PTR_ERR(fs_path);
+
+	if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
+		dprintk("%s: path %s does not begin with fsroot %s\n",
+			__FUNCTION__, path, fs_path);
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+/*
+ * Check if the string represents a "valid" IPv4 address
+ */
+static inline int valid_ipaddr4(const char *buf)
+{
+	int rc, count, in[4];
+
+	rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
+	if (rc != 4)
+		return -EINVAL;
+	for (count = 0; count < 4; count++) {
+		if (in[count] > 255)
+			return -EINVAL;
+	}
+	return 0;
+}
 
 /**
  * nfs_follow_referral - set up mountpoint when hitting a referral on moved error
@@ -60,7 +123,7 @@ Elong:
  */
 static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 					    const struct dentry *dentry,
-					    struct nfs4_fs_locations *locations)
+					    const struct nfs4_fs_locations *locations)
 {
 	struct vfsmount *mnt = ERR_PTR(-ENOENT);
 	struct nfs_clone_mount mountdata = {
@@ -68,10 +131,9 @@ static struct vfsmount *nfs_follow_refer
 		.dentry = dentry,
 		.authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor,
 	};
-	char *page, *page2;
-	char *path, *fs_path;
+	char *page = NULL, *page2 = NULL;
 	char *devname;
-	int loc, s;
+	int loc, s, error;
 
 	if (locations == NULL || locations->nlocations <= 0)
 		goto out;
@@ -79,36 +141,30 @@ static struct vfsmount *nfs_follow_refer
 	dprintk("%s: referral at %s/%s\n", __FUNCTION__,
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	/* Ensure fs path is a prefix of current dentry path */
 	page = (char *) __get_free_page(GFP_USER);
-	if (page == NULL)
+	if (!page)
 		goto out;
+
 	page2 = (char *) __get_free_page(GFP_USER);
-	if (page2 == NULL)
+	if (!page2)
 		goto out;
 
-	path = nfs4_path(dentry, page, PAGE_SIZE);
-	if (IS_ERR(path))
-		goto out_free;
-
-	fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
-	if (IS_ERR(fs_path))
-		goto out_free;
-
-	if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
-		dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path);
-		goto out_free;
+	/* Ensure fs path is a prefix of current dentry path */
+	error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2);
+	if (error < 0) {
+		mnt = ERR_PTR(error);
+		goto out;
 	}
 
 	devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
 	if (IS_ERR(devname)) {
 		mnt = (struct vfsmount *)devname;
-		goto out_free;
+		goto out;
 	}
 
 	loc = 0;
 	while (loc < locations->nlocations && IS_ERR(mnt)) {
-		struct nfs4_fs_location *location = &locations->locations[loc];
+		const struct nfs4_fs_location *location = &locations->locations[loc];
 		char *mnt_path;
 
 		if (location == NULL || location->nservers <= 0 ||
@@ -140,7 +196,7 @@ static struct vfsmount *nfs_follow_refer
 			addr.sin_port = htons(NFS_PORT);
 			mountdata.addr = &addr;
 
-			mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata);
+			mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, devname, &mountdata);
 			if (!IS_ERR(mnt)) {
 				break;
 			}
@@ -149,10 +205,9 @@ static struct vfsmount *nfs_follow_refer
 		loc++;
 	}
 
-out_free:
-	free_page((unsigned long)page);
-	free_page((unsigned long)page2);
 out:
+	free_page((unsigned long) page);
+	free_page((unsigned long) page2);
 	dprintk("%s: done\n", __FUNCTION__);
 	return mnt;
 }
@@ -165,7 +220,7 @@ out:
  */
 struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
 {
-	struct vfsmount *mnt = ERR_PTR(-ENOENT);
+	struct vfsmount *mnt = ERR_PTR(-ENOMEM);
 	struct dentry *parent;
 	struct nfs4_fs_locations *fs_locations = NULL;
 	struct page *page;
@@ -183,11 +238,16 @@ struct vfsmount *nfs_do_refmount(const s
 		goto out_free;
 
 	/* Get locations */
+	mnt = ERR_PTR(-ENOENT);
+
 	parent = dget_parent(dentry);
-	dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name);
+	dprintk("%s: getting locations for %s/%s\n",
+		__FUNCTION__, parent->d_name.name, dentry->d_name.name);
+
 	err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page);
 	dput(parent);
-	if (err != 0 || fs_locations->nlocations <= 0 ||
+	if (err != 0 ||
+	    fs_locations->nlocations <= 0 ||
 	    fs_locations->fs_path.ncomponents <= 0)
 		goto out_free;
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e6ee97f..041146f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -64,7 +64,7 @@ static int nfs4_do_fsinfo(struct nfs_ser
 static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
 static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
-static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp);
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
 
 /* Prevent leaks of NFSv4 errors into userland */
 int nfs4_map_errors(int err)
@@ -195,7 +195,7 @@ static void nfs4_setup_readdir(u64 cooki
 
 static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	spin_lock(&clp->cl_lock);
 	if (time_before(clp->cl_last_renewal,timestamp))
 		clp->cl_last_renewal = timestamp;
@@ -252,7 +252,7 @@ static struct nfs4_opendata *nfs4_openda
 	atomic_inc(&sp->so_count);
 	p->o_arg.fh = NFS_FH(dir);
 	p->o_arg.open_flags = flags,
-	p->o_arg.clientid = server->nfs4_state->cl_clientid;
+	p->o_arg.clientid = server->nfs_client->cl_clientid;
 	p->o_arg.id = sp->so_id;
 	p->o_arg.name = &dentry->d_name;
 	p->o_arg.server = server;
@@ -550,7 +550,7 @@ int nfs4_open_delegation_recall(struct d
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_EXPIRED:
 				/* Don't recall a delegation if it was lost */
-				nfs4_schedule_state_recovery(server->nfs4_state);
+				nfs4_schedule_state_recovery(server->nfs_client);
 				return err;
 		}
 		err = nfs4_handle_exception(server, err, &exception);
@@ -758,7 +758,7 @@ static int _nfs4_proc_open(struct nfs4_o
 	}
 	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
-		return server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
+		return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
 	return 0;
 }
 
@@ -792,7 +792,7 @@ out:
 
 int nfs4_recover_expired_lease(struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 
 	if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
 		nfs4_schedule_state_recovery(clp);
@@ -867,7 +867,7 @@ static int _nfs4_open_delegated(struct i
 {
 	struct nfs_delegation *delegation;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs4_state_owner *sp = NULL;
 	struct nfs4_state *state = NULL;
@@ -953,7 +953,7 @@ static int _nfs4_do_open(struct inode *d
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
 	struct nfs_server       *server = NFS_SERVER(dir);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_opendata *opendata;
 	int                     status;
 
@@ -1133,7 +1133,7 @@ static void nfs4_close_done(struct rpc_t
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			nfs4_schedule_state_recovery(server->nfs4_state);
+			nfs4_schedule_state_recovery(server->nfs_client);
 			break;
 		default:
 			if (nfs4_async_handle_error(task, server) == -EAGAIN) {
@@ -1268,7 +1268,7 @@ nfs4_atomic_open(struct inode *dir, stru
 		BUG_ON(nd->intent.open.flags & O_CREAT);
 	}
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return (struct dentry *)cred;
 	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
@@ -1291,7 +1291,7 @@ nfs4_open_revalidate(struct inode *dir, 
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
 	state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
@@ -1393,70 +1393,19 @@ static int nfs4_lookup_root(struct nfs_s
 	return err;
 }
 
+/*
+ * get the file handle for the "/" directory on the server
+ */
 static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
-		struct nfs_fsinfo *info)
+			      struct nfs_fsinfo *info)
 {
-	struct nfs_fattr *	fattr = info->fattr;
-	unsigned char *		p;
-	struct qstr		q;
-	struct nfs4_lookup_arg args = {
-		.dir_fh = fhandle,
-		.name = &q,
-		.bitmask = nfs4_fattr_bitmap,
-	};
-	struct nfs4_lookup_res res = {
-		.server = server,
-		.fattr = fattr,
-		.fh = fhandle,
-	};
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
-		.rpc_argp = &args,
-		.rpc_resp = &res,
-	};
 	int status;
 
-	/*
-	 * Now we do a separate LOOKUP for each component of the mount path.
-	 * The LOOKUPs are done separately so that we can conveniently
-	 * catch an ERR_WRONGSEC if it occurs along the way...
-	 */
 	status = nfs4_lookup_root(server, fhandle, info);
-	if (status)
-		goto out;
-
-	p = server->mnt_path;
-	for (;;) {
-		struct nfs4_exception exception = { };
-
-		while (*p == '/')
-			p++;
-		if (!*p)
-			break;
-		q.name = p;
-		while (*p && (*p != '/'))
-			p++;
-		q.len = p - q.name;
-
-		do {
-			nfs_fattr_init(fattr);
-			status = nfs4_handle_exception(server,
-					rpc_call_sync(server->client, &msg, 0),
-					&exception);
-		} while (exception.retry);
-		if (status == 0)
-			continue;
-		if (status == -ENOENT) {
-			printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path);
-			printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n");
-		}
-		break;
-	}
 	if (status == 0)
 		status = nfs4_server_capabilities(server, fhandle);
 	if (status == 0)
 		status = nfs4_do_fsinfo(server, fhandle, info);
-out:
 	return nfs4_map_errors(status);
 }
 
@@ -1565,7 +1514,7 @@ nfs4_proc_setattr(struct dentry *dentry,
 
 	nfs_fattr_init(fattr);
 	
-	cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
 
@@ -1583,6 +1532,52 @@ nfs4_proc_setattr(struct dentry *dentry,
 	return status;
 }
 
+static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+		struct qstr *name, struct nfs_fh *fhandle,
+		struct nfs_fattr *fattr)
+{
+	int		       status;
+	struct nfs4_lookup_arg args = {
+		.bitmask = server->attr_bitmask,
+		.dir_fh = dirfh,
+		.name = name,
+	};
+	struct nfs4_lookup_res res = {
+		.server = server,
+		.fattr = fattr,
+		.fh = fhandle,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+
+	nfs_fattr_init(fattr);
+
+	dprintk("NFS call  lookupfh %s\n", name->name);
+	status = rpc_call_sync(server->client, &msg, 0);
+	dprintk("NFS reply lookupfh: %d\n", status);
+	if (status == -NFS4ERR_MOVED)
+		status = -EREMOTE;
+	return status;
+}
+
+static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+			      struct qstr *name, struct nfs_fh *fhandle,
+			      struct nfs_fattr *fattr)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(server,
+				_nfs4_proc_lookupfh(server, dirfh, name,
+						    fhandle, fattr),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
 static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
 		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
@@ -1881,7 +1876,7 @@ nfs4_proc_create(struct inode *dir, stru
 	struct rpc_cred *cred;
 	int status = 0;
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred)) {
 		status = PTR_ERR(cred);
 		goto out;
@@ -2521,7 +2516,7 @@ static void nfs4_proc_commit_setup(struc
  */
 static void nfs4_renew_done(struct rpc_task *task, void *data)
 {
-	struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
+	struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp;
 	unsigned long timestamp = (unsigned long)data;
 
 	if (task->tk_status < 0) {
@@ -2543,7 +2538,7 @@ static const struct rpc_call_ops nfs4_re
 	.rpc_call_done = nfs4_renew_done,
 };
 
-int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2555,7 +2550,7 @@ int nfs4_proc_async_renew(struct nfs4_cl
 			&nfs4_renew_ops, (void *)jiffies);
 }
 
-int nfs4_proc_renew(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2757,7 +2752,7 @@ static int nfs4_proc_set_acl(struct inod
 		return -EOPNOTSUPP;
 	nfs_inode_return_delegation(inode);
 	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
-	ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
+	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (ret == 0)
 		nfs4_write_cached_acl(inode, buf, buflen);
 	return ret;
@@ -2766,7 +2761,7 @@ static int nfs4_proc_set_acl(struct inod
 static int
 nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 
 	if (!clp || task->tk_status >= 0)
 		return 0;
@@ -2803,7 +2798,7 @@ static int nfs4_wait_bit_interruptible(v
 	return 0;
 }
 
-static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
 {
 	sigset_t oldset;
 	int res;
@@ -2846,7 +2841,7 @@ static int nfs4_delay(struct rpc_clnt *c
  */
 int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	int ret = errorcode;
 
 	exception->retry = 0;
@@ -2873,7 +2868,7 @@ int nfs4_handle_exception(const struct n
 	return nfs4_map_errors(ret);
 }
 
-int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
+int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
 {
 	nfs4_verifier sc_verifier;
 	struct nfs4_setclientid setclientid = {
@@ -2897,7 +2892,7 @@ int nfs4_proc_setclientid(struct nfs4_cl
 	for(;;) {
 		setclientid.sc_name_len = scnprintf(setclientid.sc_name,
 				sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
-				clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr),
+				clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr),
 				cred->cr_ops->cr_name,
 				clp->cl_id_uniquifier);
 		setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
@@ -2920,7 +2915,7 @@ int nfs4_proc_setclientid(struct nfs4_cl
 	return status;
 }
 
-static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs_fsinfo fsinfo;
 	struct rpc_message msg = {
@@ -2944,7 +2939,7 @@ static int _nfs4_proc_setclientid_confir
 	return status;
 }
 
-int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	long timeout;
 	int err;
@@ -3052,7 +3047,7 @@ int nfs4_proc_delegreturn(struct inode *
 		switch (err) {
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_EXPIRED:
-				nfs4_schedule_state_recovery(server->nfs4_state);
+				nfs4_schedule_state_recovery(server->nfs_client);
 			case 0:
 				return 0;
 		}
@@ -3081,7 +3076,7 @@ static int _nfs4_proc_getlk(struct nfs4_
 {
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_lockt_args arg = {
 		.fh = NFS_FH(inode),
 		.fl = request,
@@ -3206,7 +3201,7 @@ static void nfs4_locku_done(struct rpc_t
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			nfs4_schedule_state_recovery(calldata->server->nfs4_state);
+			nfs4_schedule_state_recovery(calldata->server->nfs_client);
 			break;
 		default:
 			if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) {
@@ -3318,7 +3313,7 @@ static struct nfs4_lockdata *nfs4_alloc_
 	if (p->arg.lock_seqid == NULL)
 		goto out_free;
 	p->arg.lock_stateid = &lsp->ls_stateid;
-	p->arg.lock_owner.clientid = server->nfs4_state->cl_clientid;
+	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
 	p->arg.lock_owner.id = lsp->ls_id;
 	p->lsp = lsp;
 	atomic_inc(&lsp->ls_count);
@@ -3488,7 +3483,7 @@ static int nfs4_lock_expired(struct nfs4
 
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-	struct nfs4_client *clp = state->owner->so_client;
+	struct nfs_client *clp = state->owner->so_client;
 	unsigned char fl_flags = request->fl_flags;
 	int status;
 
@@ -3698,6 +3693,7 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.getroot	= nfs4_proc_get_root,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
+	.lookupfh	= nfs4_proc_lookupfh,
 	.lookup		= nfs4_proc_lookup,
 	.access		= nfs4_proc_access,
 	.readlink	= nfs4_proc_readlink,
@@ -3718,6 +3714,7 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.statfs		= nfs4_proc_statfs,
 	.fsinfo		= nfs4_proc_fsinfo,
 	.pathconf	= nfs4_proc_pathconf,
+	.set_capabilities = nfs4_server_capabilities,
 	.decode_dirent	= nfs4_decode_dirent,
 	.read_setup	= nfs4_proc_read_setup,
 	.read_done	= nfs4_read_done,
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 5d764d8..f2c8936 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -61,7 +61,7 @@ #define NFSDBG_FACILITY	NFSDBG_PROC
 void
 nfs4_renew_state(void *data)
 {
-	struct nfs4_client *clp = (struct nfs4_client *)data;
+	struct nfs_client *clp = (struct nfs_client *)data;
 	struct rpc_cred *cred;
 	long lease, timeout;
 	unsigned long last, now;
@@ -108,7 +108,7 @@ out:
 
 /* Must be called with clp->cl_sem locked for writes */
 void
-nfs4_schedule_state_renewal(struct nfs4_client *clp)
+nfs4_schedule_state_renewal(struct nfs_client *clp)
 {
 	long timeout;
 
@@ -127,26 +127,13 @@ nfs4_schedule_state_renewal(struct nfs4_
 void
 nfs4_renewd_prepare_shutdown(struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
-
-	if (!clp)
-		return;
 	flush_scheduled_work();
-	down_write(&clp->cl_sem);
-	if (!list_empty(&server->nfs4_siblings))
-		list_del_init(&server->nfs4_siblings);
-	up_write(&clp->cl_sem);
 }
 
-/* Must be called with clp->cl_sem locked for writes */
 void
-nfs4_kill_renewd(struct nfs4_client *clp)
+nfs4_kill_renewd(struct nfs_client *clp)
 {
 	down_read(&clp->cl_sem);
-	if (!list_empty(&clp->cl_superblocks)) {
-		up_read(&clp->cl_sem);
-		return;
-	}
 	cancel_delayed_work(&clp->cl_renewd);
 	up_read(&clp->cl_sem);
 	flush_scheduled_work();
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 090a36b..5fffbdf 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -50,149 +50,15 @@ #include <linux/bitops.h>
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "internal.h"
 
 #define OPENOWNER_POOL_SIZE	8
 
 const nfs4_stateid zero_stateid;
 
-static DEFINE_SPINLOCK(state_spinlock);
 static LIST_HEAD(nfs4_clientid_list);
 
-void
-init_nfsv4_state(struct nfs_server *server)
-{
-	server->nfs4_state = NULL;
-	INIT_LIST_HEAD(&server->nfs4_siblings);
-}
-
-void
-destroy_nfsv4_state(struct nfs_server *server)
-{
-	kfree(server->mnt_path);
-	server->mnt_path = NULL;
-	if (server->nfs4_state) {
-		nfs4_put_client(server->nfs4_state);
-		server->nfs4_state = NULL;
-	}
-}
-
-/*
- * nfs4_get_client(): returns an empty client structure
- * nfs4_put_client(): drops reference to client structure
- *
- * Since these are allocated/deallocated very rarely, we don't
- * bother putting them in a slab cache...
- */
-static struct nfs4_client *
-nfs4_alloc_client(struct in_addr *addr)
-{
-	struct nfs4_client *clp;
-
-	if (nfs_callback_up() < 0)
-		return NULL;
-	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
-		nfs_callback_down();
-		return NULL;
-	}
-	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
-	init_rwsem(&clp->cl_sem);
-	INIT_LIST_HEAD(&clp->cl_delegations);
-	INIT_LIST_HEAD(&clp->cl_state_owners);
-	INIT_LIST_HEAD(&clp->cl_unused);
-	spin_lock_init(&clp->cl_lock);
-	atomic_set(&clp->cl_count, 1);
-	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
-	INIT_LIST_HEAD(&clp->cl_superblocks);
-	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
-	clp->cl_rpcclient = ERR_PTR(-EINVAL);
-	clp->cl_boot_time = CURRENT_TIME;
-	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
-	return clp;
-}
-
-static void
-nfs4_free_client(struct nfs4_client *clp)
-{
-	struct nfs4_state_owner *sp;
-
-	while (!list_empty(&clp->cl_unused)) {
-		sp = list_entry(clp->cl_unused.next,
-				struct nfs4_state_owner,
-				so_list);
-		list_del(&sp->so_list);
-		kfree(sp);
-	}
-	BUG_ON(!list_empty(&clp->cl_state_owners));
-	nfs_idmap_delete(clp);
-	if (!IS_ERR(clp->cl_rpcclient))
-		rpc_shutdown_client(clp->cl_rpcclient);
-	kfree(clp);
-	nfs_callback_down();
-}
-
-static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
-{
-	struct nfs4_client *clp;
-	list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
-		if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
-			atomic_inc(&clp->cl_count);
-			return clp;
-		}
-	}
-	return NULL;
-}
-
-struct nfs4_client *nfs4_find_client(struct in_addr *addr)
-{
-	struct nfs4_client *clp;
-	spin_lock(&state_spinlock);
-	clp = __nfs4_find_client(addr);
-	spin_unlock(&state_spinlock);
-	return clp;
-}
-
-struct nfs4_client *
-nfs4_get_client(struct in_addr *addr)
-{
-	struct nfs4_client *clp, *new = NULL;
-
-	spin_lock(&state_spinlock);
-	for (;;) {
-		clp = __nfs4_find_client(addr);
-		if (clp != NULL)
-			break;
-		clp = new;
-		if (clp != NULL) {
-			list_add(&clp->cl_servers, &nfs4_clientid_list);
-			new = NULL;
-			break;
-		}
-		spin_unlock(&state_spinlock);
-		new = nfs4_alloc_client(addr);
-		spin_lock(&state_spinlock);
-		if (new == NULL)
-			break;
-	}
-	spin_unlock(&state_spinlock);
-	if (new)
-		nfs4_free_client(new);
-	return clp;
-}
-
-void
-nfs4_put_client(struct nfs4_client *clp)
-{
-	if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
-		return;
-	list_del(&clp->cl_servers);
-	spin_unlock(&state_spinlock);
-	BUG_ON(!list_empty(&clp->cl_superblocks));
-	rpc_wake_up(&clp->cl_rpcwaitq);
-	nfs4_kill_renewd(clp);
-	nfs4_free_client(clp);
-}
-
-static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
+static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
 			nfs_callback_tcpport, cred);
@@ -204,13 +70,13 @@ static int nfs4_init_client(struct nfs4_
 }
 
 u32
-nfs4_alloc_lockowner_id(struct nfs4_client *clp)
+nfs4_alloc_lockowner_id(struct nfs_client *clp)
 {
 	return clp->cl_lockowner_id ++;
 }
 
 static struct nfs4_state_owner *
-nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
+nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs4_state_owner *sp = NULL;
 
@@ -224,7 +90,7 @@ nfs4_client_grab_unused(struct nfs4_clie
 	return sp;
 }
 
-struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp)
+struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 	struct rpc_cred *cred = NULL;
@@ -238,7 +104,7 @@ struct rpc_cred *nfs4_get_renew_cred(str
 	return cred;
 }
 
-struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp)
+struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 
@@ -251,7 +117,7 @@ struct rpc_cred *nfs4_get_setclientid_cr
 }
 
 static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred)
+nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs4_state_owner *sp, *res = NULL;
 
@@ -294,7 +160,7 @@ nfs4_alloc_state_owner(void)
 void
 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs4_client *clp = sp->so_client;
+	struct nfs_client *clp = sp->so_client;
 	spin_lock(&clp->cl_lock);
 	list_del_init(&sp->so_list);
 	spin_unlock(&clp->cl_lock);
@@ -306,7 +172,7 @@ nfs4_drop_state_owner(struct nfs4_state_
  */
 struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state_owner *sp, *new;
 
 	get_rpccred(cred);
@@ -337,7 +203,7 @@ struct nfs4_state_owner *nfs4_get_state_
  */
 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs4_client *clp = sp->so_client;
+	struct nfs_client *clp = sp->so_client;
 	struct rpc_cred *cred = sp->so_cred;
 
 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
@@ -540,7 +406,7 @@ __nfs4_find_lock_state(struct nfs4_state
 static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 {
 	struct nfs4_lock_state *lsp;
-	struct nfs4_client *clp = state->owner->so_client;
+	struct nfs_client *clp = state->owner->so_client;
 
 	lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
 	if (lsp == NULL)
@@ -752,7 +618,7 @@ out:
 
 static int reclaimer(void *);
 
-static inline void nfs4_clear_recover_bit(struct nfs4_client *clp)
+static inline void nfs4_clear_recover_bit(struct nfs_client *clp)
 {
 	smp_mb__before_clear_bit();
 	clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state);
@@ -764,25 +630,25 @@ static inline void nfs4_clear_recover_bi
 /*
  * State recovery routine
  */
-static void nfs4_recover_state(struct nfs4_client *clp)
+static void nfs4_recover_state(struct nfs_client *clp)
 {
 	struct task_struct *task;
 
 	__module_get(THIS_MODULE);
 	atomic_inc(&clp->cl_count);
 	task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim",
-			NIPQUAD(clp->cl_addr));
+			NIPQUAD(clp->cl_addr.sin_addr));
 	if (!IS_ERR(task))
 		return;
 	nfs4_clear_recover_bit(clp);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put(THIS_MODULE);
 }
 
 /*
  * Schedule a state recovery attempt
  */
-void nfs4_schedule_state_recovery(struct nfs4_client *clp)
+void nfs4_schedule_state_recovery(struct nfs_client *clp)
 {
 	if (!clp)
 		return;
@@ -879,7 +745,7 @@ out_err:
 	return status;
 }
 
-static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
+static void nfs4_state_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 	struct nfs4_state *state;
@@ -903,7 +769,7 @@ static void nfs4_state_mark_reclaim(stru
 
 static int reclaimer(void *ptr)
 {
-	struct nfs4_client *clp = ptr;
+	struct nfs_client *clp = ptr;
 	struct nfs4_state_owner *sp;
 	struct nfs4_state_recovery_ops *ops;
 	struct rpc_cred *cred;
@@ -970,12 +836,12 @@ out:
 	if (status == -NFS4ERR_CB_PATH_DOWN)
 		nfs_handle_cb_pathdown(clp);
 	nfs4_clear_recover_bit(clp);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put_and_exit(0);
 	return 0;
 out_error:
 	printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
-				NIPQUAD(clp->cl_addr.s_addr), -status);
+				NIPQUAD(clp->cl_addr.sin_addr), -status);
 	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 	goto out;
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1750d99..992a713 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -58,7 +58,7 @@ #define NFSDBG_FACILITY		NFSDBG_XDR
 /* Mapping from NFS error code to "errno" error code. */
 #define errno_NFSERR_IO		EIO
 
-static int nfs_stat_to_errno(int);
+static int nfs4_stat_to_errno(int);
 
 /* NFSv4 COMPOUND tags are only wanted for debugging purposes */
 #ifdef DEBUG
@@ -529,7 +529,7 @@ static int encode_attrs(struct xdr_strea
 	if (iap->ia_valid & ATTR_MODE)
 		len += 4;
 	if (iap->ia_valid & ATTR_UID) {
-		owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name);
+		owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name);
 		if (owner_namelen < 0) {
 			printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
 			       iap->ia_uid);
@@ -541,7 +541,7 @@ static int encode_attrs(struct xdr_strea
 		len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
 	}
 	if (iap->ia_valid & ATTR_GID) {
-		owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group);
+		owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group);
 		if (owner_grouplen < 0) {
 			printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
 			       iap->ia_gid);
@@ -1160,7 +1160,7 @@ static int encode_rename(struct xdr_stre
 	return 0;
 }
 
-static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid)
+static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid)
 {
 	uint32_t *p;
 
@@ -1246,7 +1246,7 @@ static int encode_setclientid(struct xdr
 	return 0;
 }
 
-static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state)
+static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state)
 {
         uint32_t *p;
 
@@ -1945,7 +1945,7 @@ static int nfs4_xdr_enc_server_caps(stru
 /*
  * a RENEW request
  */
-static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1975,7 +1975,7 @@ static int nfs4_xdr_enc_setclientid(stru
 /*
  * a SETCLIENTID_CONFIRM request
  */
-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -2127,12 +2127,12 @@ static int decode_op_hdr(struct xdr_stre
 	}
 	READ32(nfserr);
 	if (nfserr != NFS_OK)
-		return -nfs_stat_to_errno(nfserr);
+		return -nfs4_stat_to_errno(nfserr);
 	return 0;
 }
 
 /* Dummy routine */
-static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp)
+static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
 {
 	uint32_t *p;
 	unsigned int strlen;
@@ -2636,7 +2636,7 @@ static int decode_attr_nlink(struct xdr_
 	return 0;
 }
 
-static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid)
+static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid)
 {
 	uint32_t len, *p;
 
@@ -2660,7 +2660,7 @@ static int decode_attr_owner(struct xdr_
 	return 0;
 }
 
-static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid)
+static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid)
 {
 	uint32_t len, *p;
 
@@ -3051,9 +3051,9 @@ static int decode_getfattr(struct xdr_st
 	fattr->mode |= fmode;
 	if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0)
 		goto xdr_error;
-	if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0)
+	if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0)
 		goto xdr_error;
-	if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0)
+	if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0)
 		goto xdr_error;
 	if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0)
 		goto xdr_error;
@@ -3254,7 +3254,7 @@ static int decode_delegation(struct xdr_
 			if (decode_space_limit(xdr, &res->maxsize) < 0)
 				return -EIO;
 	}
-	return decode_ace(xdr, NULL, res->server->nfs4_state);
+	return decode_ace(xdr, NULL, res->server->nfs_client);
 }
 
 static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -3564,7 +3564,7 @@ static int decode_setattr(struct xdr_str
 	return 0;
 }
 
-static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
+static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
 {
 	uint32_t *p;
 	uint32_t opnum;
@@ -3597,7 +3597,7 @@ static int decode_setclientid(struct xdr
 		READ_BUF(len);
 		return -NFSERR_CLID_INUSE;
 	} else
-		return -nfs_stat_to_errno(nfserr);
+		return -nfs4_stat_to_errno(nfserr);
 
 	return 0;
 }
@@ -4255,7 +4255,7 @@ static int nfs4_xdr_dec_fsinfo(struct rp
 	if (!status)
 		status = decode_fsinfo(&xdr, fsinfo);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4334,7 +4334,7 @@ static int nfs4_xdr_dec_renew(struct rpc
  * a SETCLIENTID request
  */
 static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
-		struct nfs4_client *clp)
+		struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4345,7 +4345,7 @@ static int nfs4_xdr_dec_setclientid(stru
 	if (!status)
 		status = decode_setclientid(&xdr, clp);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4367,7 +4367,7 @@ static int nfs4_xdr_dec_setclientid_conf
 	if (!status)
 		status = decode_fsinfo(&xdr, fsinfo);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4520,7 +4520,7 @@ static struct {
  * This one is used jointly by NFSv2 and NFSv3.
  */
 static int
-nfs_stat_to_errno(int stat)
+nfs4_stat_to_errno(int stat)
 {
 	int i;
 	for (i = 0; nfs_errtbl[i].stat != -1; i++) {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 36e902a..c45f724 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -17,6 +17,7 @@ #include <linux/nfs4.h>
 #include <linux/nfs_page.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
+#include "internal.h"
 
 #define NFS_PARANOIA 1
 
@@ -84,7 +85,7 @@ nfs_create_request(struct nfs_open_conte
 	atomic_set(&req->wb_complete, 0);
 	req->wb_index	= page->index;
 	page_cache_get(page);
-	BUG_ON(PagePrivate(page));
+	BUG_ON(PageNfsWriting(page));
 	BUG_ON(!PageLocked(page));
 	BUG_ON(page->mapping->host != inode);
 	req->wb_offset  = offset;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b3899ea..42a0558 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -66,14 +66,14 @@ nfs_proc_get_root(struct nfs_server *ser
 
 	dprintk("%s: call getattr\n", __FUNCTION__);
 	nfs_fattr_init(fattr);
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
 	dprintk("%s: call statfs\n", __FUNCTION__);
 	msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
 	msg.rpc_resp = &fsinfo;
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 52bf634..13ff66a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -26,11 +26,13 @@ #include <linux/pagemap.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
+#include <linux/nfs_mount.h>
 #include <linux/smp_lock.h>
 
 #include <asm/system.h>
 
 #include "iostat.h"
+#include "internal.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
@@ -162,7 +164,7 @@ static int nfs_readpage_sync(struct nfs_
 		rdata->args.offset = page_offset(page) + rdata->args.pgbase;
 
 		dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
-			NFS_SERVER(inode)->hostname,
+			NFS_SERVER(inode)->nfs_client->cl_hostname,
 			inode->i_sb->s_id,
 			(long long)NFS_FILEID(inode),
 			(unsigned long long)rdata->args.pgbase,
@@ -200,13 +202,18 @@ static int nfs_readpage_sync(struct nfs_
 		SetPageUptodate(page);
 	result = 0;
 
+	nfs_readpage_to_fscache(inode, page, 1);
+	unlock_page(page);
+
+	return result;
+
 io_error:
 	unlock_page(page);
 	nfs_readdata_free(rdata);
 	return result;
 }
 
-static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
+int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 		struct page *page)
 {
 	LIST_HEAD(one_request);
@@ -231,6 +238,11 @@ static int nfs_readpage_async(struct nfs
 
 static void nfs_readpage_release(struct nfs_page *req)
 {
+	struct inode *d_inode = req->wb_context->dentry->d_inode;
+
+	if (PageUptodate(req->wb_page))
+		nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+
 	unlock_page(req->wb_page);
 
 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
@@ -613,6 +625,10 @@ int nfs_readpage(struct file *file, stru
 		ctx = get_nfs_open_context((struct nfs_open_context *)
 				file->private_data);
 	if (!IS_SYNC(inode)) {
+		error = nfs_readpage_from_fscache(ctx, inode, page);
+		if (error == 0)
+			goto out;
+
 		error = nfs_readpage_async(ctx, inode, page);
 		goto out;
 	}
@@ -643,6 +659,7 @@ readpage_async_filler(void *data, struct
 	unsigned int len;
 
 	nfs_wb_page(inode, page);
+
 	len = nfs_page_length(inode, page);
 	if (len == 0)
 		return nfs_return_empty_page(page);
@@ -682,6 +699,17 @@ int nfs_readpages(struct file *filp, str
 	} else
 		desc.ctx = get_nfs_open_context((struct nfs_open_context *)
 				filp->private_data);
+
+	/* attempt to read as many of the pages as possible from the cache
+	 * - this returns -ENOBUFS immediately if the cookie is negative
+	 */
+	ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping,
+					 pages, &nr_pages);
+	if (ret == 0) {
+		put_nfs_open_context(desc.ctx);
+		return ret; /* all read */
+	}
+
 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 	if (!list_empty(&head)) {
 		int err = nfs_pagein_list(&head, server->rpages);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e8a9bee..db4e072 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -13,6 +13,11 @@
  *
  *  Split from inode.c by David Howells <dhowells@redhat.com>
  *
+ * - superblocks are indexed on server only - all inodes, dentries, etc. associated with a
+ *   particular server are held in the same superblock
+ * - NFS superblocks can have several effective roots to the dentry tree
+ * - directory type roots are spliced into the tree when a path from one root reaches the root
+ *   of another (see nfs_lookup())
  */
 
 #include <linux/config.h>
@@ -52,66 +57,12 @@ #include "internal.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-/* Maximum number of readahead requests
- * FIXME: this should really be a sysctl so that users may tune it to suit
- *        their needs. People that do NFS over a slow network, might for
- *        instance want to reduce it to something closer to 1 for improved
- *        interactive response.
- */
-#define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
-
-/*
- * RPC cruft for NFS
- */
-static struct rpc_version * nfs_version[] = {
-	NULL,
-	NULL,
-	&nfs_version2,
-#if defined(CONFIG_NFS_V3)
-	&nfs_version3,
-#elif defined(CONFIG_NFS_V4)
-	NULL,
-#endif
-#if defined(CONFIG_NFS_V4)
-	&nfs_version4,
-#endif
-};
-
-static struct rpc_program nfs_program = {
-	.name			= "nfs",
-	.number			= NFS_PROGRAM,
-	.nrvers			= ARRAY_SIZE(nfs_version),
-	.version		= nfs_version,
-	.stats			= &nfs_rpcstat,
-	.pipe_dir_name		= "/nfs",
-};
-
-struct rpc_stat nfs_rpcstat = {
-	.program		= &nfs_program
-};
-
-
-#ifdef CONFIG_NFS_V3_ACL
-static struct rpc_stat		nfsacl_rpcstat = { &nfsacl_program };
-static struct rpc_version *	nfsacl_version[] = {
-	[3]			= &nfsacl_version3,
-};
-
-struct rpc_program		nfsacl_program = {
-	.name =			"nfsacl",
-	.number =		NFS_ACL_PROGRAM,
-	.nrvers =		ARRAY_SIZE(nfsacl_version),
-	.version =		nfsacl_version,
-	.stats =		&nfsacl_rpcstat,
-};
-#endif  /* CONFIG_NFS_V3_ACL */
-
 static void nfs_umount_begin(struct vfsmount *, int);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
 static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
-static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
+static int nfs_xdev_get_sb(struct file_system_type *fs_type,
 		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static void nfs_kill_super(struct super_block *);
 
@@ -123,10 +74,10 @@ static struct file_system_type nfs_fs_ty
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type clone_nfs_fs_type = {
+struct file_system_type nfs_xdev_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs",
-	.get_sb		= nfs_clone_nfs_sb,
+	.get_sb		= nfs_xdev_get_sb,
 	.kill_sb	= nfs_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -145,10 +96,10 @@ static struct super_operations nfs_sops 
 #ifdef CONFIG_NFS_V4
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_referral_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static void nfs4_kill_super(struct super_block *sb);
 
 static struct file_system_type nfs4_fs_type = {
@@ -159,18 +110,18 @@ static struct file_system_type nfs4_fs_t
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type clone_nfs4_fs_type = {
+struct file_system_type nfs4_xdev_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
-	.get_sb		= nfs_clone_nfs4_sb,
+	.get_sb		= nfs4_xdev_get_sb,
 	.kill_sb	= nfs4_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type nfs_referral_nfs4_fs_type = {
+struct file_system_type nfs4_referral_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
-	.get_sb		= nfs_referral_nfs4_sb,
+	.get_sb		= nfs4_referral_get_sb,
 	.kill_sb	= nfs4_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -187,40 +138,6 @@ static struct super_operations nfs4_sops
 };
 #endif
 
-#ifdef CONFIG_NFS_V4
-static const int nfs_set_port_min = 0;
-static const int nfs_set_port_max = 65535;
-
-static int param_set_port(const char *val, struct kernel_param *kp)
-{
-	char *endp;
-	int num = simple_strtol(val, &endp, 0);
-	if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
-		return -EINVAL;
-	*((int *)kp->arg) = num;
-	return 0;
-}
-
-module_param_call(callback_tcpport, param_set_port, param_get_int,
-		 &nfs_callback_set_tcpport, 0644);
-#endif
-
-#ifdef CONFIG_NFS_V4
-static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
-{
-	char *endp;
-	int num = simple_strtol(val, &endp, 0);
-	int jif = num * HZ;
-	if (endp == val || *endp || num < 0 || jif < num)
-		return -EINVAL;
-	*((int *)kp->arg) = jif;
-	return 0;
-}
-
-module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
-		 &nfs_idmap_cache_timeout, 0644);
-#endif
-
 /*
  * Register the NFS filesystems
  */
@@ -269,11 +186,10 @@ #endif
  */
 static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct super_block *sb = dentry->d_sb;
-	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_server *server = NFS_SB(dentry->d_sb);
 	unsigned char blockbits;
 	unsigned long blockres;
-	struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode);
+	struct nfs_fh *fh = NFS_FH(dentry->d_inode);
 	struct nfs_fattr fattr;
 	struct nfs_fsstat res = {
 			.fattr = &fattr,
@@ -282,7 +198,7 @@ static int nfs_statfs(struct dentry *den
 
 	lock_kernel();
 
-	error = server->rpc_ops->statfs(server, rootfh, &res);
+	error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
 	buf->f_type = NFS_SUPER_MAGIC;
 	if (error < 0)
 		goto out_err;
@@ -292,7 +208,7 @@ static int nfs_statfs(struct dentry *den
 	 * case where f_frsize != f_bsize.  Eventually we want to
 	 * report the value of wtmult in this field.
 	 */
-	buf->f_frsize = sb->s_blocksize;
+	buf->f_frsize = dentry->d_sb->s_blocksize;
 
 	/*
 	 * On most *nix systems, f_blocks, f_bfree, and f_bavail
@@ -301,8 +217,8 @@ static int nfs_statfs(struct dentry *den
 	 * thus historically Linux's sys_statfs reports these
 	 * fields in units of f_bsize.
 	 */
-	buf->f_bsize = sb->s_blocksize;
-	blockbits = sb->s_blocksize_bits;
+	buf->f_bsize = dentry->d_sb->s_blocksize;
+	blockbits = dentry->d_sb->s_blocksize_bits;
 	blockres = (1 << blockbits) - 1;
 	buf->f_blocks = (res.tbytes + blockres) >> blockbits;
 	buf->f_bfree = (res.fbytes + blockres) >> blockbits;
@@ -323,9 +239,12 @@ static int nfs_statfs(struct dentry *den
 
 }
 
+/*
+ * Map the security flavour number to a name
+ */
 static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
 {
-	static struct {
+	static const struct {
 		rpc_authflavor_t flavour;
 		const char *str;
 	} sec_flavours[] = {
@@ -356,7 +275,7 @@ static const char *nfs_pseudoflavour_to_
  */
 static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
 {
-	static struct proc_nfs_info {
+	static const struct proc_nfs_info {
 		int flag;
 		char *str;
 		char *nostr;
@@ -367,13 +286,15 @@ static void nfs_show_mount_options(struc
 		{ NFS_MOUNT_NOAC, ",noac", "" },
 		{ NFS_MOUNT_NONLM, ",nolock", "" },
 		{ NFS_MOUNT_NOACL, ",noacl", "" },
+		{ NFS_MOUNT_FSCACHE, ",fsc", "" },
 		{ 0, NULL, NULL }
 	};
-	struct proc_nfs_info *nfs_infop;
+	const struct proc_nfs_info *nfs_infop;
+	struct nfs_client *clp = nfss->nfs_client;
 	char buf[12];
 	char *proto;
 
-	seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
+	seq_printf(m, ",vers=%d", clp->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
 	seq_printf(m, ",wsize=%d", nfss->wsize);
 	if (nfss->acregmin != 3*HZ || showdefaults)
@@ -402,8 +323,8 @@ static void nfs_show_mount_options(struc
 			proto = buf;
 	}
 	seq_printf(m, ",proto=%s", proto);
-	seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
-	seq_printf(m, ",retrans=%u", nfss->retrans_count);
+	seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
+	seq_printf(m, ",retrans=%u", clp->retrans_count);
 	seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
 }
 
@@ -417,7 +338,7 @@ static int nfs_show_options(struct seq_f
 	nfs_show_mount_options(m, nfss, 0);
 
 	seq_puts(m, ",addr=");
-	seq_escape(m, nfss->hostname, " \t\n\\");
+	seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\");
 
 	return 0;
 }
@@ -454,7 +375,7 @@ static int nfs_show_stats(struct seq_fil
 	seq_printf(m, ",namelen=%d", nfss->namelen);
 
 #ifdef CONFIG_NFS_V4
-	if (nfss->rpc_ops->version == 4) {
+	if (nfss->nfs_client->rpc_ops->version == 4) {
 		seq_printf(m, "\n\tnfsv4:\t");
 		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
 		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
@@ -501,782 +422,351 @@ #endif
 
 /*
  * Begin unmount by attempting to remove all automounted mountpoints we added
- * in response to traversals
+ * in response to xdev traversals and referrals
  */
 static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
 {
-	struct nfs_server *server;
-	struct rpc_clnt	*rpc;
-
 	shrink_submounts(vfsmnt, &nfs_automount_list);
-	if (!(flags & MNT_FORCE))
-		return;
-	/* -EIO all pending I/O */
-	server = NFS_SB(vfsmnt->mnt_sb);
-	rpc = server->client;
-	if (!IS_ERR(rpc))
-		rpc_killall_tasks(rpc);
-	rpc = server->client_acl;
-	if (!IS_ERR(rpc))
-		rpc_killall_tasks(rpc);
 }
 
 /*
- * Obtain the root inode of the file system.
+ * Validate the NFS2/NFS3 mount data
+ * - fills in the mount root filehandle
  */
-static struct inode *
-nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
+static int nfs_validate_mount_data(struct nfs_mount_data *data,
+				   struct nfs_fh *mntfh)
 {
-	struct nfs_server	*server = NFS_SB(sb);
-	int			error;
-
-	error = server->rpc_ops->getroot(server, rootfh, fsinfo);
-	if (error < 0) {
-		dprintk("nfs_get_root: getattr error = %d\n", -error);
-		return ERR_PTR(error);
+	if (data == NULL) {
+		dprintk("%s: missing data argument\n", __FUNCTION__);
+		return -EINVAL;
 	}
 
-	server->fsid = fsinfo->fattr->fsid;
-	return nfs_fhget(sb, rootfh, fsinfo->fattr);
-}
-
-/*
- * Do NFS version-independent mount processing, and sanity checking
- */
-static int
-nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
-{
-	struct nfs_server	*server;
-	struct inode		*root_inode;
-	struct nfs_fattr	fattr;
-	struct nfs_fsinfo	fsinfo = {
-					.fattr = &fattr,
-				};
-	struct nfs_pathconf pathinfo = {
-			.fattr = &fattr,
-	};
-	int no_root_error = 0;
-	unsigned long max_rpc_payload;
-
-	/* We probably want something more informative here */
-	snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
-
-	server = NFS_SB(sb);
-
-	sb->s_magic      = NFS_SUPER_MAGIC;
-
-	server->io_stats = nfs_alloc_iostats();
-	if (server->io_stats == NULL)
-		return -ENOMEM;
+	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
+		dprintk("%s: bad mount version\n", __FUNCTION__);
+		return -EINVAL;
+	}
 
-	root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
-	/* Did getting the root inode fail? */
-	if (IS_ERR(root_inode)) {
-		no_root_error = PTR_ERR(root_inode);
-		goto out_no_root;
+	switch (data->version) {
+		case 1:
+			data->namlen = 0;
+		case 2:
+			data->bsize  = 0;
+		case 3:
+			if (data->flags & NFS_MOUNT_VER3) {
+				dprintk("%s: mount structure version %d does not support NFSv3\n",
+						__FUNCTION__,
+						data->version);
+				return -EINVAL;
+			}
+			data->root.size = NFS2_FHSIZE;
+			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+		case 4:
+			if (data->flags & NFS_MOUNT_SECFLAVOUR) {
+				dprintk("%s: mount structure version %d does not support strong security\n",
+						__FUNCTION__,
+						data->version);
+				return -EINVAL;
+			}
+			/* Fill in pseudoflavor for mount version < 5 */
+			data->pseudoflavor = RPC_AUTH_UNIX;
+		case 5:
+			memset(data->context, 0, sizeof(data->context));
 	}
-	sb->s_root = d_alloc_root(root_inode);
-	if (!sb->s_root) {
-		no_root_error = -ENOMEM;
-		goto out_no_root;
+
+#ifndef CONFIG_NFS_V3
+	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
+	if (data->flags & NFS_MOUNT_VER3) {
+		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
+		return -EPROTONOSUPPORT;
 	}
-	sb->s_root->d_op = server->rpc_ops->dentry_ops;
-
-	/* mount time stamp, in seconds */
-	server->mount_time = jiffies;
-
-	/* Get some general file system info */
-	if (server->namelen == 0 &&
-	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
-		server->namelen = pathinfo.max_namelen;
-	/* Work out a lot of parameters */
-	if (server->rsize == 0)
-		server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
-	if (server->wsize == 0)
-		server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
-
-	if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
-		server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
-	if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
-		server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
-
-	max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
-	if (server->rsize > max_rpc_payload)
-		server->rsize = max_rpc_payload;
-	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
-		server->rsize = NFS_MAX_FILE_IO_SIZE;
-	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
-	if (server->wsize > max_rpc_payload)
-		server->wsize = max_rpc_payload;
-	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
-		server->wsize = NFS_MAX_FILE_IO_SIZE;
-	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+#endif /* CONFIG_NFS_V3 */
 
-	if (sb->s_blocksize == 0)
-		sb->s_blocksize = nfs_block_bits(server->wsize,
-							 &sb->s_blocksize_bits);
-	server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL);
-
-	server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
-	if (server->dtsize > PAGE_CACHE_SIZE)
-		server->dtsize = PAGE_CACHE_SIZE;
-	if (server->dtsize > server->rsize)
-		server->dtsize = server->rsize;
-
-	if (server->flags & NFS_MOUNT_NOAC) {
-		server->acregmin = server->acregmax = 0;
-		server->acdirmin = server->acdirmax = 0;
-		sb->s_flags |= MS_SYNCHRONOUS;
+	/* We now require that the mount process passes the remote address */
+	if (data->addr.sin_addr.s_addr == INADDR_ANY) {
+		dprintk("%s: mount program didn't pass remote address!\n",
+			__FUNCTION__);
+		return -EINVAL;
 	}
-	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
 
-	nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
+	/* Prepare the root filehandle */
+	if (data->flags & NFS_MOUNT_VER3)
+		mntfh->size = data->root.size;
+	else
+		mntfh->size = NFS2_FHSIZE;
 
-	server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
-	server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
+	if (mntfh->size > sizeof(mntfh->data)) {
+		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
+		return -EINVAL;
+	}
+
+	memcpy(mntfh->data, data->root.data, mntfh->size);
+	if (mntfh->size < sizeof(mntfh->data))
+		memset(mntfh->data + mntfh->size, 0,
+		       sizeof(mntfh->data) - mntfh->size);
 
-	/* We're airborne Set socket buffersize */
-	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
 	return 0;
-	/* Yargs. It didn't work out. */
-out_no_root:
-	dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error);
-	if (!IS_ERR(root_inode))
-		iput(root_inode);
-	return no_root_error;
 }
 
 /*
- * Initialise the timeout values for a connection
+ * Initialise the common bits of the superblock
  */
-static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans)
+static inline void nfs_initialise_sb(struct super_block *sb)
 {
-	to->to_initval = timeo * HZ / 10;
-	to->to_retries = retrans;
-	if (!to->to_retries)
-		to->to_retries = 2;
-
-	switch (proto) {
-	case IPPROTO_TCP:
-		if (!to->to_initval)
-			to->to_initval = 60 * HZ;
-		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
-			to->to_initval = NFS_MAX_TCP_TIMEOUT;
-		to->to_increment = to->to_initval;
-		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
-		to->to_exponential = 0;
-		break;
-	case IPPROTO_UDP:
-	default:
-		if (!to->to_initval)
-			to->to_initval = 11 * HZ / 10;
-		if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
-			to->to_initval = NFS_MAX_UDP_TIMEOUT;
-		to->to_maxval = NFS_MAX_UDP_TIMEOUT;
-		to->to_exponential = 1;
-		break;
-	}
-}
+	struct nfs_server *server = NFS_SB(sb);
 
-/*
- * Create an RPC client handle.
- */
-static struct rpc_clnt *
-nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
-{
-	struct rpc_timeout	timeparms;
-	struct rpc_xprt		*xprt = NULL;
-	struct rpc_clnt		*clnt = NULL;
-	int			proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
-
-	nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
-
-	server->retrans_timeo = timeparms.to_initval;
-	server->retrans_count = timeparms.to_retries;
-
-	/* create transport and client */
-	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
-	if (IS_ERR(xprt)) {
-		dprintk("%s: cannot create RPC transport. Error = %ld\n",
-				__FUNCTION__, PTR_ERR(xprt));
-		return (struct rpc_clnt *)xprt;
-	}
-	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				 server->rpc_ops->version, data->pseudoflavor);
-	if (IS_ERR(clnt)) {
-		dprintk("%s: cannot create RPC client. Error = %ld\n",
-				__FUNCTION__, PTR_ERR(xprt));
-		goto out_fail;
-	}
+	sb->s_magic = NFS_SUPER_MAGIC;
 
-	clnt->cl_intr     = 1;
-	clnt->cl_softrtry = 1;
+	/* We probably want something more informative here */
+	snprintf(sb->s_id, sizeof(sb->s_id),
+		 "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
 
-	return clnt;
+	if (sb->s_blocksize == 0)
+		sb->s_blocksize = nfs_block_bits(server->wsize,
+						 &sb->s_blocksize_bits);
+
+	if (server->flags & NFS_MOUNT_NOAC)
+		sb->s_flags |= MS_SYNCHRONOUS;
 
-out_fail:
-	return clnt;
+	nfs_super_set_maxbytes(sb, server->maxfilesize);
 }
 
 /*
- * Clone a server record
+ * Finish setting up an NFS2/3 superblock
  */
-static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data)
+static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data)
 {
 	struct nfs_server *server = NFS_SB(sb);
-	struct nfs_server *parent = NFS_SB(data->sb);
-	struct inode *root_inode;
-	struct nfs_fsinfo fsinfo;
-	void *err = ERR_PTR(-ENOMEM);
-
-	sb->s_op = data->sb->s_op;
-	sb->s_blocksize = data->sb->s_blocksize;
-	sb->s_blocksize_bits = data->sb->s_blocksize_bits;
-	sb->s_maxbytes = data->sb->s_maxbytes;
-
-	server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
-	server->io_stats = nfs_alloc_iostats();
-	if (server->io_stats == NULL)
-		goto out;
-
-	server->client = rpc_clone_client(parent->client);
-	if (IS_ERR((err = server->client)))
-		goto out;
-
-	if (!IS_ERR(parent->client_sys)) {
-		server->client_sys = rpc_clone_client(parent->client_sys);
-		if (IS_ERR((err = server->client_sys)))
-			goto out;
-	}
-	if (!IS_ERR(parent->client_acl)) {
-		server->client_acl = rpc_clone_client(parent->client_acl);
-		if (IS_ERR((err = server->client_acl)))
-			goto out;
-	}
-	root_inode = nfs_fhget(sb, data->fh, data->fattr);
-	if (!root_inode)
-		goto out;
-	sb->s_root = d_alloc_root(root_inode);
-	if (!sb->s_root)
-		goto out_put_root;
-	fsinfo.fattr = data->fattr;
-	if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0)
-		nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
-	sb->s_root->d_op = server->rpc_ops->dentry_ops;
-	sb->s_flags |= MS_ACTIVE;
-	return server;
-out_put_root:
-	iput(root_inode);
-out:
-	return err;
-}
 
-/*
- * Copy an existing superblock and attach revised data
- */
-static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
-		struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *),
-		struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *),
-		struct vfsmount *mnt)
-{
-	struct nfs_server *server;
-	struct nfs_server *parent = NFS_SB(data->sb);
-	struct super_block *sb = ERR_PTR(-EINVAL);
-	char *hostname;
-	int error = -ENOMEM;
-	int len;
-
-	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (server == NULL)
-		goto out_err;
-	memcpy(server, parent, sizeof(*server));
-	hostname = (data->hostname != NULL) ? data->hostname : parent->hostname;
-	len = strlen(hostname) + 1;
-	server->hostname = kmalloc(len, GFP_KERNEL);
-	if (server->hostname == NULL)
-		goto free_server;
-	memcpy(server->hostname, hostname, len);
-	error = rpciod_up();
-	if (error != 0)
-		goto free_hostname;
-
-	sb = fill_sb(server, data);
-	if (IS_ERR(sb)) {
-		error = PTR_ERR(sb);
-		goto kill_rpciod;
-	}
-		
-	if (sb->s_root)
-		goto out_rpciod_down;
+	sb->s_blocksize_bits = 0;
+	sb->s_blocksize = 0;
+	if (data->bsize)
+		sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
 
-	server = fill_server(sb, data);
-	if (IS_ERR(server)) {
-		error = PTR_ERR(server);
-		goto out_deactivate;
+	if (server->flags & NFS_MOUNT_VER3) {
+		/* The VFS shouldn't apply the umask to mode bits. We will do
+		 * so ourselves when necessary.
+		 */
+		sb->s_flags |= MS_POSIXACL;
+		sb->s_time_gran = 1;
 	}
-	return simple_set_mnt(mnt, sb);
-out_deactivate:
-	up_write(&sb->s_umount);
-	deactivate_super(sb);
-	return error;
-out_rpciod_down:
-	rpciod_down();
-	kfree(server->hostname);
-	kfree(server);
-	return simple_set_mnt(mnt, sb);
-kill_rpciod:
-	rpciod_down();
-free_hostname:
-	kfree(server->hostname);
-free_server:
-	kfree(server);
-out_err:
-	return error;
+
+	sb->s_op = &nfs_sops;
+ 	nfs_initialise_sb(sb);
 }
 
 /*
- * Set up an NFS2/3 superblock
- *
- * The way this works is that the mount process passes a structure
- * in the data argument which contains the server's IP address
- * and the root file handle obtained from the server's mount
- * daemon. We stash these away in the private superblock fields.
+ * Finish setting up a cloned NFS2/3 superblock
  */
-static int
-nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
+static void nfs_clone_super(struct super_block *sb,
+			    const struct super_block *old_sb)
 {
-	struct nfs_server	*server;
-	rpc_authflavor_t	authflavor;
+	struct nfs_server *server = NFS_SB(sb);
 
-	server           = NFS_SB(sb);
-	sb->s_blocksize_bits = 0;
-	sb->s_blocksize = 0;
-	if (data->bsize)
-		sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
-	if (data->rsize)
-		server->rsize = nfs_block_size(data->rsize, NULL);
-	if (data->wsize)
-		server->wsize = nfs_block_size(data->wsize, NULL);
-	server->flags    = data->flags & NFS_MOUNT_FLAGMASK;
-
-	server->acregmin = data->acregmin*HZ;
-	server->acregmax = data->acregmax*HZ;
-	server->acdirmin = data->acdirmin*HZ;
-	server->acdirmax = data->acdirmax*HZ;
-
-	/* Start lockd here, before we might error out */
-	if (!(server->flags & NFS_MOUNT_NONLM))
-		lockd_up();
-
-	server->namelen  = data->namlen;
-	server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
-	if (!server->hostname)
-		return -ENOMEM;
-	strcpy(server->hostname, data->hostname);
-
-	/* Check NFS protocol revision and initialize RPC op vector
-	 * and file handle pool. */
-#ifdef CONFIG_NFS_V3
-	if (server->flags & NFS_MOUNT_VER3) {
-		server->rpc_ops = &nfs_v3_clientops;
-		server->caps |= NFS_CAP_READDIRPLUS;
-	} else {
-		server->rpc_ops = &nfs_v2_clientops;
-	}
-#else
-	server->rpc_ops = &nfs_v2_clientops;
-#endif
+	sb->s_blocksize_bits = old_sb->s_blocksize_bits;
+	sb->s_blocksize = old_sb->s_blocksize;
+	sb->s_maxbytes = old_sb->s_maxbytes;
 
-	/* Fill in pseudoflavor for mount version < 5 */
-	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
-		data->pseudoflavor = RPC_AUTH_UNIX;
-	authflavor = data->pseudoflavor;	/* save for sb_init() */
-	/* XXX maybe we want to add a server->pseudoflavor field */
-
-	/* Create RPC client handles */
-	server->client = nfs_create_client(server, data);
-	if (IS_ERR(server->client))
-		return PTR_ERR(server->client);
-	/* RFC 2623, sec 2.3.2 */
-	if (authflavor != RPC_AUTH_UNIX) {
-		struct rpc_auth *auth;
-
-		server->client_sys = rpc_clone_client(server->client);
-		if (IS_ERR(server->client_sys))
-			return PTR_ERR(server->client_sys);
-		auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys);
-		if (IS_ERR(auth))
-			return PTR_ERR(auth);
-	} else {
-		atomic_inc(&server->client->cl_count);
-		server->client_sys = server->client;
-	}
 	if (server->flags & NFS_MOUNT_VER3) {
-#ifdef CONFIG_NFS_V3_ACL
-		if (!(server->flags & NFS_MOUNT_NOACL)) {
-			server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
-			/* No errors! Assume that Sun nfsacls are supported */
-			if (!IS_ERR(server->client_acl))
-				server->caps |= NFS_CAP_ACLS;
-		}
-#else
-		server->flags &= ~NFS_MOUNT_NOACL;
-#endif /* CONFIG_NFS_V3_ACL */
-		/*
-		 * The VFS shouldn't apply the umask to mode bits. We will
-		 * do so ourselves when necessary.
+		/* The VFS shouldn't apply the umask to mode bits. We will do
+		 * so ourselves when necessary.
 		 */
 		sb->s_flags |= MS_POSIXACL;
-		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
-			server->namelen = NFS3_MAXNAMLEN;
 		sb->s_time_gran = 1;
-	} else {
-		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
-			server->namelen = NFS2_MAXNAMLEN;
 	}
 
-	sb->s_op = &nfs_sops;
-	return nfs_sb_init(sb, authflavor);
+	sb->s_op = old_sb->s_op;
+ 	nfs_initialise_sb(sb);
 }
 
-static int nfs_set_super(struct super_block *s, void *data)
+static int nfs_set_super(struct super_block *s, void *_server)
 {
-	s->s_fs_info = data;
-	return set_anon_super(s, data);
+	struct nfs_server *server = _server;
+	int ret;
+
+	s->s_fs_info = server;
+	ret = set_anon_super(s, server);
+	if (ret == 0)
+		server->s_dev = s->s_dev;
+	return ret;
 }
 
 static int nfs_compare_super(struct super_block *sb, void *data)
 {
-	struct nfs_server *server = data;
-	struct nfs_server *old = NFS_SB(sb);
+	struct nfs_server *server = data, *old = NFS_SB(sb);
 
-	if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr)
+	if (old->nfs_client != server->nfs_client)
 		return 0;
-	if (old->addr.sin_port != server->addr.sin_port)
+	if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
 		return 0;
-	return !nfs_compare_fh(&old->fh, &server->fh);
+	return 1;
 }
 
 static int nfs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
-	int error;
 	struct nfs_server *server = NULL;
 	struct super_block *s;
-	struct nfs_fh *root;
+	struct nfs_fh mntfh;
 	struct nfs_mount_data *data = raw_data;
+	struct dentry *mntroot;
+	int error;
 
-	error = -EINVAL;
-	if (data == NULL) {
-		dprintk("%s: missing data argument\n", __FUNCTION__);
-		goto out_err_noserver;
-	}
-	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
-		dprintk("%s: bad mount version\n", __FUNCTION__);
-		goto out_err_noserver;
-	}
-	switch (data->version) {
-		case 1:
-			data->namlen = 0;
-		case 2:
-			data->bsize  = 0;
-		case 3:
-			if (data->flags & NFS_MOUNT_VER3) {
-				dprintk("%s: mount structure version %d does not support NFSv3\n",
-						__FUNCTION__,
-						data->version);
-				goto out_err_noserver;
-			}
-			data->root.size = NFS2_FHSIZE;
-			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
-		case 4:
-			if (data->flags & NFS_MOUNT_SECFLAVOUR) {
-				dprintk("%s: mount structure version %d does not support strong security\n",
-						__FUNCTION__,
-						data->version);
-				goto out_err_noserver;
-			}
-		case 5:
-			memset(data->context, 0, sizeof(data->context));
-	}
-#ifndef CONFIG_NFS_V3
-	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
-	error = -EPROTONOSUPPORT;
-	if (data->flags & NFS_MOUNT_VER3) {
-		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
-		goto out_err_noserver;
-	}
-#endif /* CONFIG_NFS_V3 */
+	/* Validate the mount data */
+	error = nfs_validate_mount_data(data, &mntfh);
+	if (error < 0)
+		return error;
 
-	error = -ENOMEM;
-	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (!server)
+	/* Get a volume representation */
+	server = nfs_create_server(data, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
 		goto out_err_noserver;
-	/* Zero out the NFS state stuff */
-	init_nfsv4_state(server);
-	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
-
-	root = &server->fh;
-	if (data->flags & NFS_MOUNT_VER3)
-		root->size = data->root.size;
-	else
-		root->size = NFS2_FHSIZE;
-	error = -EINVAL;
-	if (root->size > sizeof(root->data)) {
-		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
-		goto out_err;
-	}
-	memcpy(root->data, data->root.data, root->size);
-
-	/* We now require that the mount process passes the remote address */
-	memcpy(&server->addr, &data->addr, sizeof(server->addr));
-	if (server->addr.sin_addr.s_addr == INADDR_ANY) {
-		dprintk("%s: mount program didn't pass remote address!\n",
-				__FUNCTION__);
-		goto out_err;
-	}
-
-	/* Fire up rpciod if not yet running */
-	error = rpciod_up();
-	if (error < 0) {
-		dprintk("%s: couldn't start rpciod! Error = %d\n",
-				__FUNCTION__, error);
-		goto out_err;
 	}
 
+	/* Get a superblock - note that we may end up sharing one that already exists */
 	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
-		goto out_err_rpciod;
+		goto out_err_nosb;
 	}
 
-	if (s->s_root)
-		goto out_rpciod_down;
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
 
-	s->s_flags = flags;
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs_fill_super(s, data);
+	}
 
-	error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
-	if (error) {
-		up_write(&s->s_umount);
-		deactivate_super(s);
-		return error;
+	mntroot = nfs_get_root(s, &mntfh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
 	}
-	s->s_flags |= MS_ACTIVE;
-	return simple_set_mnt(mnt, s);
 
-out_rpciod_down:
-	rpciod_down();
-	kfree(server);
-	return simple_set_mnt(mnt, s);
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+	return 0;
 
-out_err_rpciod:
-	rpciod_down();
-out_err:
-	kfree(server);
+out_err_nosb:
+	nfs_free_server(server);
 out_err_noserver:
 	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	return error;
 }
 
+/*
+ * Destroy an NFS2/3 superblock
+ */
 static void nfs_kill_super(struct super_block *s)
 {
 	struct nfs_server *server = NFS_SB(s);
 
 	kill_anon_super(s);
-
-	if (!IS_ERR(server->client))
-		rpc_shutdown_client(server->client);
-	if (!IS_ERR(server->client_sys))
-		rpc_shutdown_client(server->client_sys);
-	if (!IS_ERR(server->client_acl))
-		rpc_shutdown_client(server->client_acl);
-
-	if (!(server->flags & NFS_MOUNT_NONLM))
-		lockd_down();	/* release rpc.lockd */
-
-	rpciod_down();		/* release rpciod */
-
-	nfs_free_iostats(server->io_stats);
-	kfree(server->hostname);
-	kfree(server);
-	nfs_release_automount_timer();
+	nfs_free_server(server);
 }
 
-static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	struct super_block *sb;
-
-	server->fsid = data->fattr->fsid;
-	nfs_copy_fh(&server->fh, data->fh);
-	sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
-	if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM))
-		lockd_up();
-	return sb;
-}
-
-static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+/*
+ * Clone an NFS2/3 server record on xdev traversal (FSID-change)
+ */
+static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
+			   const char *dev_name, void *raw_data,
+			   struct vfsmount *mnt)
 {
 	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server, mnt);
-}
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	int error;
 
-#ifdef CONFIG_NFS_V4
-static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
-	struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor)
-{
-	struct nfs4_client *clp;
-	struct rpc_xprt *xprt = NULL;
-	struct rpc_clnt *clnt = NULL;
-	int err = -EIO;
-
-	clp = nfs4_get_client(&server->addr.sin_addr);
-	if (!clp) {
-		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
-		return ERR_PTR(err);
-	}
+	dprintk("--> nfs_xdev_get_sb()\n");
 
-	/* Now create transport and client */
-	down_write(&clp->cl_sem);
-	if (IS_ERR(clp->cl_rpcclient)) {
-		xprt = xprt_create_proto(proto, &server->addr, timeparms);
-		if (IS_ERR(xprt)) {
-			up_write(&clp->cl_sem);
-			err = PTR_ERR(xprt);
-			dprintk("%s: cannot create RPC transport. Error = %d\n",
-					__FUNCTION__, err);
-			goto out_fail;
-		}
-		/* Bind to a reserved port! */
-		xprt->resvport = 1;
-		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				server->rpc_ops->version, flavor);
-		if (IS_ERR(clnt)) {
-			up_write(&clp->cl_sem);
-			err = PTR_ERR(clnt);
-			dprintk("%s: cannot create RPC client. Error = %d\n",
-					__FUNCTION__, err);
-			goto out_fail;
-		}
-		clnt->cl_intr     = 1;
-		clnt->cl_softrtry = 1;
-		clp->cl_rpcclient = clnt;
-		memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
-		nfs_idmap_new(clp);
-	}
-	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
-	clnt = rpc_clone_client(clp->cl_rpcclient);
-	if (!IS_ERR(clnt))
-		server->nfs4_state = clp;
-	up_write(&clp->cl_sem);
-	clp = NULL;
-
-	if (IS_ERR(clnt)) {
-		dprintk("%s: cannot create RPC client. Error = %d\n",
-				__FUNCTION__, err);
-		return clnt;
+	/* create a new volume representation */
+	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
 	}
 
-	if (server->nfs4_state->cl_idmap == NULL) {
-		dprintk("%s: failed to create idmapper.\n", __FUNCTION__);
-		return ERR_PTR(-ENOMEM);
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
 	}
 
-	if (clnt->cl_auth->au_flavor != flavor) {
-		struct rpc_auth *auth;
-
-		auth = rpcauth_create(flavor, clnt);
-		if (IS_ERR(auth)) {
-			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
-			return (struct rpc_clnt *)auth;
-		}
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
 	}
-	return clnt;
 
- out_fail:
-	if (clp)
-		nfs4_put_client(clp);
-	return ERR_PTR(err);
-}
-
-/*
- * Set up an NFS4 superblock
- */
-static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
-{
-	struct nfs_server *server;
-	struct rpc_timeout timeparms;
-	rpc_authflavor_t authflavour;
-	int err = -EIO;
-
-	sb->s_blocksize_bits = 0;
-	sb->s_blocksize = 0;
-	server = NFS_SB(sb);
-	if (data->rsize != 0)
-		server->rsize = nfs_block_size(data->rsize, NULL);
-	if (data->wsize != 0)
-		server->wsize = nfs_block_size(data->wsize, NULL);
-	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
-	server->caps = NFS_CAP_ATOMIC_OPEN;
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs_clone_super(s, data->sb);
+	}
 
-	server->acregmin = data->acregmin*HZ;
-	server->acregmax = data->acregmax*HZ;
-	server->acdirmin = data->acdirmin*HZ;
-	server->acdirmax = data->acdirmax*HZ;
+	mntroot = nfs_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
 
-	server->rpc_ops = &nfs_v4_clientops;
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
 
-	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
+	dprintk("<-- nfs_xdev_get_sb() = 0\n");
+	return 0;
 
-	server->retrans_timeo = timeparms.to_initval;
-	server->retrans_count = timeparms.to_retries;
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error);
+	return error;
 
-	/* Now create transport and client */
-	authflavour = RPC_AUTH_UNIX;
-	if (data->auth_flavourlen != 0) {
-		if (data->auth_flavourlen != 1) {
-			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
-					__FUNCTION__, data->auth_flavourlen);
-			err = -EINVAL;
-			goto out_fail;
-		}
-		if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
-			err = -EFAULT;
-			goto out_fail;
-		}
-	}
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error);
+	return error;
+}
 
-	server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour);
-	if (IS_ERR(server->client)) {
-		err = PTR_ERR(server->client);
-			dprintk("%s: cannot create RPC client. Error = %d\n",
-					__FUNCTION__, err);
-			goto out_fail;
-	}
+#ifdef CONFIG_NFS_V4
 
+/*
+ * Finish setting up a cloned NFS4 superblock
+ */
+static void nfs4_clone_super(struct super_block *sb,
+			    const struct super_block *old_sb)
+{
+	sb->s_blocksize_bits = old_sb->s_blocksize_bits;
+	sb->s_blocksize = old_sb->s_blocksize;
+	sb->s_maxbytes = old_sb->s_maxbytes;
 	sb->s_time_gran = 1;
-
-	sb->s_op = &nfs4_sops;
-	err = nfs_sb_init(sb, authflavour);
-
- out_fail:
-	return err;
+	sb->s_op = old_sb->s_op;
+ 	nfs_initialise_sb(sb);
 }
 
-static int nfs4_compare_super(struct super_block *sb, void *data)
+/*
+ * Set up an NFS4 superblock
+ */
+static void nfs4_fill_super(struct super_block *sb)
 {
-	struct nfs_server *server = data;
-	struct nfs_server *old = NFS_SB(sb);
-
-	if (strcmp(server->hostname, old->hostname) != 0)
-		return 0;
-	if (strcmp(server->mnt_path, old->mnt_path) != 0)
-		return 0;
-	return 1;
+	sb->s_time_gran = 1;
+	sb->s_op = &nfs4_sops;
+	nfs_initialise_sb(sb);
 }
 
-static void *
-nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
 {
 	void *p = NULL;
 
@@ -1297,14 +787,22 @@ nfs_copy_user_string(char *dst, struct n
 	return dst;
 }
 
+/*
+ * Get the superblock for an NFS4 mountpoint
+ */
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
-	int error;
-	struct nfs_server *server;
-	struct super_block *s;
 	struct nfs4_mount_data *data = raw_data;
+	struct super_block *s;
+	struct nfs_server *server;
+	struct sockaddr_in addr;
+	rpc_authflavor_t authflavour;
+	struct nfs_fh mntfh;
+	struct dentry *mntroot;
+	char *mntpath = NULL, *hostname = NULL, ip_addr[16];
 	void *p;
+	int error;
 
 	if (data == NULL) {
 		dprintk("%s: missing data argument\n", __FUNCTION__);
@@ -1315,84 +813,107 @@ static int nfs4_get_sb(struct file_syste
 		return -EINVAL;
 	}
 
-	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (!server)
-		return -ENOMEM;
-	/* Zero out the NFS state stuff */
-	init_nfsv4_state(server);
-	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
+	/* We now require that the mount process passes the remote address */
+	if (data->host_addrlen != sizeof(addr))
+		return -EINVAL;
+
+	if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
+		return -EFAULT;
+
+	if (addr.sin_family != AF_INET ||
+	    addr.sin_addr.s_addr == INADDR_ANY
+	    ) {
+		dprintk("%s: mount program didn't pass remote IP address!\n",
+				__FUNCTION__);
+		return -EINVAL;
+	}
+
+	/* Grab the authentication type */
+	authflavour = RPC_AUTH_UNIX;
+	if (data->auth_flavourlen != 0) {
+		if (data->auth_flavourlen != 1) {
+			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
+					__FUNCTION__, data->auth_flavourlen);
+			error = -EINVAL;
+			goto out_err_noserver;
+		}
+
+		if (copy_from_user(&authflavour, data->auth_flavours,
+				   sizeof(authflavour))) {
+			error = -EFAULT;
+			goto out_err_noserver;
+		}
+	}
 
 	p = nfs_copy_user_string(NULL, &data->hostname, 256);
 	if (IS_ERR(p))
 		goto out_err;
-	server->hostname = p;
+	hostname = p;
 
 	p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
 	if (IS_ERR(p))
 		goto out_err;
-	server->mnt_path = p;
+	mntpath = p;
+
+	dprintk("MNTPATH: %s\n", mntpath);
 
-	p = nfs_copy_user_string(server->ip_addr, &data->client_addr,
-			sizeof(server->ip_addr) - 1);
+	p = nfs_copy_user_string(ip_addr, &data->client_addr,
+				 sizeof(ip_addr) - 1);
 	if (IS_ERR(p))
 		goto out_err;
 
-	/* We now require that the mount process passes the remote address */
-	if (data->host_addrlen != sizeof(server->addr)) {
-		error = -EINVAL;
-		goto out_free;
-	}
-	if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
-		error = -EFAULT;
-		goto out_free;
-	}
-	if (server->addr.sin_family != AF_INET ||
-	    server->addr.sin_addr.s_addr == INADDR_ANY) {
-		dprintk("%s: mount program didn't pass remote IP address!\n",
-				__FUNCTION__);
-		error = -EINVAL;
-		goto out_free;
-	}
-
-	/* Fire up rpciod if not yet running */
-	error = rpciod_up();
-	if (error < 0) {
-		dprintk("%s: couldn't start rpciod! Error = %d\n",
-				__FUNCTION__, error);
-		goto out_free;
+	/* Get a volume representation */
+	server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
+				    authflavour, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
 	}
 
-	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
-
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_free;
 	}
 
-	if (s->s_root) {
-		kfree(server->mnt_path);
-		kfree(server->hostname);
-		kfree(server);
-		return simple_set_mnt(mnt, s);
-	}
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
 
-	s->s_flags = flags;
+		nfs4_fill_super(s);
+	} else {
+		nfs_free_server(server);
+	}
 
-	error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
-	if (error) {
-		up_write(&s->s_umount);
-		deactivate_super(s);
-		return error;
+	mntroot = nfs4_get_root(s, &mntfh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
 	}
+
 	s->s_flags |= MS_ACTIVE;
-	return simple_set_mnt(mnt, s);
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+	kfree(mntpath);
+	kfree(hostname);
+	return 0;
+
 out_err:
 	error = PTR_ERR(p);
+	goto out_err_noserver;
+
 out_free:
-	kfree(server->mnt_path);
-	kfree(server->hostname);
-	kfree(server);
+	nfs_free_server(server);
+out_err_noserver:
+	kfree(mntpath);
+	kfree(hostname);
 	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	goto out_err_noserver;
 }
 
 static void nfs4_kill_super(struct super_block *sb)
@@ -1403,135 +924,140 @@ static void nfs4_kill_super(struct super
 	kill_anon_super(sb);
 
 	nfs4_renewd_prepare_shutdown(server);
+	nfs_free_server(server);
+}
+
+/*
+ * Clone an NFS4 server record on xdev traversal (FSID-change)
+ */
+static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
+			    const char *dev_name, void *raw_data,
+			    struct vfsmount *mnt)
+{
+	struct nfs_clone_mount *data = raw_data;
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	int error;
+
+	dprintk("--> nfs4_xdev_get_sb()\n");
+
+	/* create a new volume representation */
+	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
+	}
+
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
+	}
 
-	if (server->client != NULL && !IS_ERR(server->client))
-		rpc_shutdown_client(server->client);
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
 
-	destroy_nfsv4_state(server);
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs4_clone_super(s, data->sb);
+	}
+
+	mntroot = nfs4_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
 
-	rpciod_down();
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+
+	dprintk("<-- nfs4_xdev_get_sb() = 0\n");
+	return 0;
+
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error);
+	return error;
 
-	nfs_free_iostats(server->io_stats);
-	kfree(server->hostname);
-	kfree(server);
-	nfs_release_automount_timer();
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error);
+	return error;
 }
 
 /*
- * Constructs the SERVER-side path
+ * Create an NFS4 server record on referral traversal
  */
-static inline char *nfs4_dup_path(const struct dentry *dentry)
+static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
+				const char *dev_name, void *raw_data,
+				struct vfsmount *mnt)
 {
-	char *page = (char *) __get_free_page(GFP_USER);
-	char *path;
+	struct nfs_clone_mount *data = raw_data;
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	struct nfs_fh mntfh;
+	int error;
 
-	path = nfs4_path(dentry, page, PAGE_SIZE);
-	if (!IS_ERR(path)) {
-		int len = PAGE_SIZE + page - path;
-		char *tmp = path;
+	dprintk("--> nfs4_referral_get_sb()\n");
 
-		path = kmalloc(len, GFP_KERNEL);
-		if (path)
-			memcpy(path, tmp, len);
-		else
-			path = ERR_PTR(-ENOMEM);
+	/* create a new volume representation */
+	server = nfs4_create_referral_server(data, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
 	}
-	free_page((unsigned long)page);
-	return path;
-}
 
-static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	const struct dentry *dentry = data->dentry;
-	struct nfs4_client *clp = server->nfs4_state;
-	struct super_block *sb;
-
-	server->fsid = data->fattr->fsid;
-	nfs_copy_fh(&server->fh, data->fh);
-	server->mnt_path = nfs4_dup_path(dentry);
-	if (IS_ERR(server->mnt_path)) {
-		sb = (struct super_block *)server->mnt_path;
-		goto err;
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
 	}
-	sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
-	if (IS_ERR(sb) || sb->s_root)
-		goto free_path;
-	nfs4_server_capabilities(server, &server->fh);
-
-	down_write(&clp->cl_sem);
-	atomic_inc(&clp->cl_count);
-	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
-	up_write(&clp->cl_sem);
-	return sb;
-free_path:
-	kfree(server->mnt_path);
-err:
-	server->mnt_path = NULL;
-	return sb;
-}
 
-static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
-{
-	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server, mnt);
-}
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
 
-static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	struct super_block *sb = ERR_PTR(-ENOMEM);
-	int len;
-
-	len = strlen(data->mnt_path) + 1;
-	server->mnt_path = kmalloc(len, GFP_KERNEL);
-	if (server->mnt_path == NULL)
-		goto err;
-	memcpy(server->mnt_path, data->mnt_path, len);
-	memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in));
-
-	sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
-	if (IS_ERR(sb) || sb->s_root)
-		goto free_path;
-	return sb;
-free_path:
-	kfree(server->mnt_path);
-err:
-	server->mnt_path = NULL;
-	return sb;
-}
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs4_fill_super(s);
+	}
 
-static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data)
-{
-	struct nfs_server *server = NFS_SB(sb);
-	struct rpc_timeout timeparms;
-	int proto, timeo, retrans;
-	void *err;
-
-	proto = IPPROTO_TCP;
-	/* Since we are following a referral and there may be alternatives,
-	   set the timeouts and retries to low values */
-	timeo = 2;
-	retrans = 1;
-	nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
-
-	server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor);
-	if (IS_ERR((err = server->client)))
-		goto out_err;
+	mntroot = nfs4_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
 
-	sb->s_time_gran = 1;
-	sb->s_op = &nfs4_sops;
-	err = ERR_PTR(nfs_sb_init(sb, data->authflavor));
-	if (!IS_ERR(err))
-		return server;
-out_err:
-	return (struct nfs_server *)err;
-}
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
 
-static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
-{
-	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server, mnt);
+	dprintk("<-- nfs4_referral_get_sb() = 0\n");
+	return 0;
+
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
+	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
+	return error;
 }
 
-#endif
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 2fe3403..7a25a6d 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -14,6 +14,7 @@ #include <linux/nfs_idmap.h>
 #include <linux/nfs_fs.h>
 
 #include "callback.h"
+#include "internal.h"
 
 static const int nfs_set_port_min = 0;
 static const int nfs_set_port_max = 65535;
@@ -55,6 +56,48 @@ #endif
 		.proc_handler	= &proc_dointvec_jiffies,
 		.strategy	= &sysctl_jiffies,
 	},
+#ifdef CONFIG_NFS_FSCACHE
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "fscache_from_error",
+		.data = &nfs_fscache_from_error,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec,
+	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "fscache_to_error",
+		.data = &nfs_fscache_to_error,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec,
+	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "fscache_uncache_page",
+		.data = &nfs_fscache_uncache_page,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec,
+	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "fscache_to_pages",
+		.data = &nfs_fscache_to_pages,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec_minmax,
+	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "fscache_from_pages",
+		.data = &nfs_fscache_from_pages,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec,
+	},
+#endif
 	{ .ctl_name = 0 }
 };
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 86bac6a..cd8d972 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,6 +63,7 @@ #include <linux/smp_lock.h>
 
 #include "delegation.h"
 #include "iostat.h"
+#include "internal.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
@@ -163,6 +164,9 @@ static void nfs_grow_file(struct page *p
 		return;
 	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
 	i_size_write(inode, end);
+#ifdef FSCACHE_WRITE_SUPPORT
+	nfs_set_fscsize(NFS_SERVER(inode), NFS_I(inode), end);
+#endif
 }
 
 /* We can set the PG_uptodate flag if we see that a write request
@@ -342,6 +346,9 @@ do_it:
 		err = -EBADF;
 		goto out;
 	}
+
+	nfs_writepage_to_fscache(inode, page);
+
 	lock_kernel();
 	if (!IS_SYNC(inode) && inode_referenced) {
 		err = nfs_writepage_async(ctx, inode, page, 0, offset);
@@ -424,7 +431,7 @@ static int nfs_inode_add_request(struct 
 		if (nfs_have_delegation(inode, FMODE_WRITE))
 			nfsi->change_attr++;
 	}
-	SetPagePrivate(req->wb_page);
+	SetPageNfsWriting(req->wb_page);
 	nfsi->npages++;
 	atomic_inc(&req->wb_count);
 	return 0;
@@ -441,7 +448,7 @@ static void nfs_inode_remove_request(str
 	BUG_ON (!NFS_WBACK_BUSY(req));
 
 	spin_lock(&nfsi->req_lock);
-	ClearPagePrivate(req->wb_page);
+	ClearPageNfsWriting(req->wb_page);
 	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
 	nfsi->npages--;
 	if (!nfsi->npages) {
@@ -1280,7 +1287,7 @@ #if defined(CONFIG_NFS_V3) || defined(CO
 		if (time_before(complain, jiffies)) {
 			dprintk("NFS: faulty NFS server %s:"
 				" (committed = %d) != (stable = %d)\n",
-				NFS_SERVER(data->inode)->hostname,
+				NFS_SERVER(data->inode)->nfs_client->cl_hostname,
 				resp->verf->committed, argp->stable);
 			complain = jiffies + 300 * HZ;
 		}
diff --git a/fs/open.c b/fs/open.c
index 303f06d..3cf6c00 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -974,6 +974,26 @@ struct file *dentry_open(struct dentry *
 EXPORT_SYMBOL(dentry_open);
 
 /*
+ * open a specifically in-kernel file
+ */
+struct file *dentry_open_kernel(struct dentry *dentry, struct vfsmount *mnt, int flags)
+{
+	int error;
+	struct file *f;
+
+	error = -ENFILE;
+	f = get_empty_kernel_filp(FKFLAGS_NO_ENFILE);
+	if (f == NULL) {
+		dput(dentry);
+		mntput(mnt);
+		return ERR_PTR(error);
+	}
+
+	return __dentry_open(dentry, mnt, flags, f, NULL);
+}
+EXPORT_SYMBOL_GPL(dentry_open_kernel);
+
+/*
  * Find an empty file descriptor entry, and mark it busy.
  */
 int get_unused_fd(void)
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 12dfdcf..df64f04 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2351,7 +2351,7 @@ static int reiserfs_write_full_page(stru
 	struct buffer_head *head, *bh;
 	int partial = 0;
 	int nr = 0;
-	int checked = PageChecked(page);
+	int checked = PageFsMisc(page);
 	struct reiserfs_transaction_handle th;
 	struct super_block *s = inode->i_sb;
 	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
@@ -2420,7 +2420,7 @@ static int reiserfs_write_full_page(stru
 	 * blocks we're going to log
 	 */
 	if (checked) {
-		ClearPageChecked(page);
+		ClearPageFsMisc(page);
 		reiserfs_write_lock(s);
 		error = journal_begin(&th, s, bh_per_page + 1);
 		if (error) {
@@ -2801,7 +2801,7 @@ static void reiserfs_invalidatepage(stru
 	BUG_ON(!PageLocked(page));
 
 	if (offset == 0)
-		ClearPageChecked(page);
+		ClearPageFsMisc(page);
 
 	if (!page_has_buffers(page))
 		goto out;
@@ -2842,7 +2842,7 @@ static int reiserfs_set_page_dirty(struc
 {
 	struct inode *inode = page->mapping->host;
 	if (reiserfs_file_data_log(inode)) {
-		SetPageChecked(page);
+		SetPageFsMisc(page);
 		return __set_page_dirty_nobuffers(page);
 	}
 	return __set_page_dirty_buffers(page);
@@ -2865,7 +2865,7 @@ static int reiserfs_releasepage(struct p
 	struct buffer_head *bh;
 	int ret = 1;
 
-	WARN_ON(PageChecked(page));
+	WARN_ON(PageFsMisc(page));
 	spin_lock(&j->j_dirty_buffers_lock);
 	head = page_buffers(page);
 	bh = head;
diff --git a/fs/super.c b/fs/super.c
index 6d4e817..3bf8e5f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -228,17 +228,17 @@ static int grab_super(struct super_block
  *	that need destruction out of superblock, call generic_shutdown_super()
  *	and release aforementioned objects.  Note: dentries and inodes _are_
  *	taken care of and do not need specific handling.
+ *
+ *	Upon calling this function, the filesystem may no longer alter or
+ *	rearrange the set of dentries belonging to this super_block, nor may it
+ *	change the attachments of dentries to inodes.
  */
 void generic_shutdown_super(struct super_block *sb)
 {
-	struct dentry *root = sb->s_root;
 	struct super_operations *sop = sb->s_op;
 
-	if (root) {
-		sb->s_root = NULL;
-		shrink_dcache_parent(root);
-		shrink_dcache_sb(sb);
-		dput(root);
+	if (sb->s_root) {
+		shrink_dcache_for_umount(sb);
 		fsync_super(sb);
 		lock_super(sb);
 		sb->s_flags &= ~MS_ACTIVE;
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 7f0a0aa..e04327c 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -135,7 +135,7 @@ static void ufs_check_page(struct page *
 	if (offs != limit)
 		goto Eend;
 out:
-	SetPageChecked(page);
+	SetPageFsMisc(page);
 	return;
 
 	/* Too bad, we had an error */
@@ -173,7 +173,7 @@ Eend:
 		   "offset=%lu",
 		   dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs);
 fail:
-	SetPageChecked(page);
+	SetPageFsMisc(page);
 	SetPageError(page);
 }
 
@@ -187,7 +187,7 @@ static struct page *ufs_get_page(struct 
 		kmap(page);
 		if (!PageUptodate(page))
 			goto fail;
-		if (!PageChecked(page))
+		if (!PageFsMisc(page))
 			ufs_check_page(page);
 		if (PageError(page))
 			goto fail;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 471781f..63f64a9 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -221,6 +221,7 @@ static inline int dname_external(struct 
  */
 extern void d_instantiate(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
+extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
 extern void d_delete(struct dentry *);
 
 /* allocate/de-allocate */
@@ -229,6 +230,7 @@ extern struct dentry * d_alloc_anon(stru
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
+extern void shrink_dcache_for_umount(struct super_block *);
 extern int d_invalidate(struct dentry *);
 
 /* only used at mount-time */
diff --git a/include/linux/file.h b/include/linux/file.h
index 9f7c251..da7be8f 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -79,7 +79,6 @@ extern void FASTCALL(set_close_on_exec(u
 extern void put_filp(struct file *);
 extern int get_unused_fd(void);
 extern void FASTCALL(put_unused_fd(unsigned int fd));
-struct kmem_cache;
 
 extern struct file ** alloc_fd_array(int);
 extern void free_fd_array(struct file **, int);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 43aef9b..623df73 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -33,7 +33,11 @@ struct files_stat_struct {
 	int nr_free_files;	/* read only */
 	int max_files;		/* tunable */
 };
+struct files_kernel_stat_struct {
+	int nr_kernel_files;	/* read only */
+};
 extern struct files_stat_struct files_stat;
+extern struct files_kernel_stat_struct files_kernel_stat;
 extern int get_max_files(void);
 
 struct inodes_stat_t {
@@ -69,6 +73,8 @@ #define FMODE_PWRITE	FMODE_PREAD	/* Thes
    behavior for cross-node execution/opening_for_writing of files */
 #define FMODE_EXEC	16
 
+#define FKFLAGS_NO_ENFILE	1	/* kernel internal file (ignored for ENFILE accounting) */
+
 #define RW_MASK		1
 #define RWA_MASK	2
 #define READ 0
@@ -678,6 +684,7 @@ struct file {
 	atomic_t		f_count;
 	unsigned int 		f_flags;
 	mode_t			f_mode;
+	unsigned short		f_kernel_flags;
 	loff_t			f_pos;
 	struct fown_struct	f_owner;
 	unsigned int		f_uid, f_gid;
@@ -1419,6 +1426,7 @@ extern long do_sys_open(int fdf, const c
 			int mode);
 extern struct file *filp_open(const char *, int, int);
 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
+extern struct file * dentry_open_kernel(struct dentry *, struct vfsmount *, int);
 extern int filp_close(struct file *, fl_owner_t id);
 extern char * getname(const char __user *);
 
@@ -1622,6 +1630,7 @@ static inline void insert_inode_hash(str
 }
 
 extern struct file * get_empty_filp(void);
+extern struct file * get_empty_kernel_filp(unsigned short fkflags);
 extern void file_move(struct file *f, struct list_head *list);
 extern void file_kill(struct file *f);
 struct bio;
@@ -1647,6 +1656,7 @@ extern ssize_t generic_file_direct_write
 		unsigned long *, loff_t, loff_t *, size_t, size_t);
 extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
 		unsigned long, loff_t, loff_t *, size_t, ssize_t);
+extern int generic_file_buffered_write_one_kernel_page(struct file *, pgoff_t, struct page *);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
 ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
new file mode 100644
index 0000000..e229fed
--- /dev/null
+++ b/include/linux/fscache-cache.h
@@ -0,0 +1,243 @@
+/* fscache-cache.h: general filesystem caching backing cache interface
+ *
+ * Copyright (C) 2004-6 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * NOTE!!! See:
+ *
+ *	Documentation/filesystems/caching/backend-api.txt
+ *
+ * for a description of the cache backend interface declared here.
+ */
+
+#ifndef _LINUX_FSCACHE_CACHE_H
+#define _LINUX_FSCACHE_CACHE_H
+
+#include <linux/fscache.h>
+
+#define NR_MAXCACHES BITS_PER_LONG
+
+struct fscache_cache;
+struct fscache_cache_ops;
+struct fscache_object;
+
+/*
+ * cache tag definition
+ */
+struct fscache_cache_tag {
+	struct list_head		link;
+	struct fscache_cache		*cache;		/* cache referred to by this tag */
+	atomic_t			usage;
+	char				name[0];	/* tag name */
+};
+
+/*
+ * cache definition
+ */
+struct fscache_cache {
+	struct fscache_cache_ops	*ops;
+	struct fscache_cache_tag	*tag;		/* tag representing this cache */
+	struct list_head		link;		/* link in list of caches */
+	struct rw_semaphore		withdrawal_sem;	/* withdrawal control sem */
+	size_t				max_index_size;	/* maximum size of index data */
+	char				identifier[32];	/* cache label */
+
+	/* node management */
+	struct list_head		object_list;	/* list of data/index objects */
+	spinlock_t			object_list_lock;
+	struct fscache_object		*fsdef;		/* object for the fsdef index */
+	unsigned long			flags;
+#define FSCACHE_IOERROR			0	/* cache stopped on I/O error */
+};
+
+extern void fscache_init_cache(struct fscache_cache *cache,
+			       struct fscache_cache_ops *ops,
+			       const char *idfmt,
+			       ...) __attribute__ ((format (printf,3,4)));
+
+extern int fscache_add_cache(struct fscache_cache *cache,
+			     struct fscache_object *fsdef,
+			     const char *tagname);
+extern void fscache_withdraw_cache(struct fscache_cache *cache);
+
+extern void fscache_io_error(struct fscache_cache *cache);
+
+/*****************************************************************************/
+/*
+ * cache operations
+ */
+struct fscache_cache_ops {
+	/* name of cache provider */
+	const char *name;
+
+	/* look up the object for a cookie, creating it on disc if necessary */
+	struct fscache_object *(*lookup_object)(struct fscache_cache *cache,
+						struct fscache_object *parent,
+						struct fscache_cookie *cookie);
+
+	/* increment the usage count on this object (may fail if unmounting) */
+	struct fscache_object *(*grab_object)(struct fscache_object *object);
+
+	/* lock a semaphore on an object */
+	void (*lock_object)(struct fscache_object *object);
+
+	/* unlock a semaphore on an object */
+	void (*unlock_object)(struct fscache_object *object);
+
+	/* pin an object in the cache */
+	int (*pin_object)(struct fscache_object *object);
+
+	/* unpin an object in the cache */
+	void (*unpin_object)(struct fscache_object *object);
+
+	/* store the updated auxilliary data on an object */
+	void (*update_object)(struct fscache_object *object);
+
+	/* dispose of a reference to an object */
+	void (*put_object)(struct fscache_object *object);
+
+	/* sync a cache */
+	void (*sync_cache)(struct fscache_cache *cache);
+
+	/* set the data size of an object */
+	int (*set_i_size)(struct fscache_object *object, loff_t i_size);
+
+	/* reserve space for an object's data and associated metadata */
+	int (*reserve_space)(struct fscache_object *object, loff_t i_size);
+
+	/* request a backing block for a page be read or allocated in the
+	 * cache */
+	int (*read_or_alloc_page)(struct fscache_object *object,
+				  struct page *page,
+				  fscache_rw_complete_t end_io_func,
+				  void *context,
+				  unsigned long gfp);
+
+	/* request backing blocks for a list of pages be read or allocated in
+	 * the cache */
+	int (*read_or_alloc_pages)(struct fscache_object *object,
+				   struct address_space *mapping,
+				   struct list_head *pages,
+				   unsigned *nr_pages,
+				   fscache_rw_complete_t end_io_func,
+				   void *context,
+				   unsigned long gfp);
+
+	/* request a backing block for a page be allocated in the cache so that
+	 * it can be written directly */
+	int (*allocate_page)(struct fscache_object *object,
+			     struct page *page,
+			     unsigned long gfp);
+
+	/* write a page to its backing block in the cache */
+	int (*write_page)(struct fscache_object *object,
+			  struct page *page,
+			  fscache_rw_complete_t end_io_func,
+			  void *context,
+			  unsigned long gfp);
+
+	/* write several pages to their backing blocks in the cache */
+	int (*write_pages)(struct fscache_object *object,
+			   struct pagevec *pagevec,
+			   fscache_rw_complete_t end_io_func,
+			   void *context,
+			   unsigned long gfp);
+
+	/* detach backing block from a bunch of pages */
+	void (*uncache_pages)(struct fscache_object *object,
+			     struct pagevec *pagevec);
+
+	/* dissociate a cache from all the pages it was backing */
+	void (*dissociate_pages)(struct fscache_cache *cache);
+};
+
+/*****************************************************************************/
+/*
+ * data file or index object cookie
+ * - a file will only appear in one cache
+ * - a request to cache a file may or may not be honoured, subject to
+ *   constraints such as disc space
+ * - indexes files are created on disc just-in-time
+ */
+struct fscache_cookie {
+	atomic_t			usage;		/* number of users of this cookie */
+	atomic_t			children;	/* number of children of this cookie */
+	struct rw_semaphore		sem;		/* list creation vs scan lock */
+	struct hlist_head		backing_objects; /* object(s) backing this file/index */
+	struct fscache_cookie_def	*def;		/* definition */
+	struct fscache_cookie		*parent;	/* parent of this entry */
+	struct fscache_netfs		*netfs;		/* owner network fs definition */
+	void				*netfs_data;	/* back pointer to netfs */
+};
+
+extern struct fscache_cookie fscache_fsdef_index;
+
+/*****************************************************************************/
+/*
+ * on-disc cache file or index handle
+ */
+struct fscache_object {
+	unsigned long			flags;
+#define FSCACHE_OBJECT_RELEASING	0	/* T if object is being released */
+#define FSCACHE_OBJECT_RECYCLING	1	/* T if object is being retired */
+#define FSCACHE_OBJECT_WITHDRAWN	2	/* T if object has been withdrawn */
+
+	struct list_head		cache_link;	/* link in cache->object_list */
+	struct hlist_node		cookie_link;	/* link in cookie->backing_objects */
+	struct fscache_cache		*cache;		/* cache that supplied this object */
+	struct fscache_cookie		*cookie;	/* netfs's file/index object */
+};
+
+static inline
+void fscache_object_init(struct fscache_object *object)
+{
+	object->flags = 0;
+	INIT_LIST_HEAD(&object->cache_link);
+	INIT_HLIST_NODE(&object->cookie_link);
+	object->cache = NULL;
+	object->cookie = NULL;
+}
+
+/* find the parent index object for a object */
+static inline
+struct fscache_object *fscache_find_parent_object(struct fscache_object *object)
+{
+	struct fscache_object *parent;
+	struct fscache_cookie *cookie = object->cookie;
+	struct fscache_cache *cache = object->cache;
+	struct hlist_node *_p;
+
+	hlist_for_each_entry(parent, _p,
+			     &cookie->parent->backing_objects,
+			     cookie_link
+			     ) {
+		if (parent->cache == cache)
+			return parent;
+	}
+
+	return NULL;
+}
+
+/* get an extra reference to a context */
+static inline
+void *fscache_get_context(struct fscache_cookie *cookie, void *context)
+{
+	if (cookie->def->get_context)
+		cookie->def->get_context(cookie->netfs_data, context);
+	return context;
+}
+
+/* release an extra reference to a context */
+static inline
+void fscache_put_context(struct fscache_cookie *cookie, void *context)
+{
+	if (cookie->def->put_context)
+		cookie->def->put_context(cookie->netfs_data, context);
+}
+
+#endif /* _LINUX_FSCACHE_CACHE_H */
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
new file mode 100644
index 0000000..b9697f9
--- /dev/null
+++ b/include/linux/fscache.h
@@ -0,0 +1,496 @@
+/* fscache.h: general filesystem caching interface
+ *
+ * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * NOTE!!! See:
+ *
+ *	Documentation/filesystems/caching/netfs-api.txt
+ *
+ * for a description of the network filesystem interface declared here.
+ */
+
+#ifndef _LINUX_FSCACHE_H
+#define _LINUX_FSCACHE_H
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/pagemap.h>
+#include <linux/pagevec.h>
+
+struct pagevec;
+struct fscache_cache_tag;
+struct fscache_cookie;
+struct fscache_netfs;
+struct fscache_netfs_operations;
+
+typedef void (*fscache_rw_complete_t)(struct page *page,
+				      void *context,
+				      int error);
+
+/* result of index entry consultation */
+typedef enum {
+	FSCACHE_CHECKAUX_OKAY,		/* entry okay as is */
+	FSCACHE_CHECKAUX_NEEDS_UPDATE,	/* entry requires update */
+	FSCACHE_CHECKAUX_OBSOLETE,	/* entry requires deletion */
+} fscache_checkaux_t;
+
+/*****************************************************************************/
+/*
+ * fscache cookie definition
+ */
+struct fscache_cookie_def
+{
+	/* name of cookie type */
+	char name[16];
+
+	/* cookie type */
+	uint8_t type;
+#define FSCACHE_COOKIE_TYPE_INDEX	0
+#define FSCACHE_COOKIE_TYPE_DATAFILE	1
+
+	/* select the cache into which to insert an entry in this index
+	 * - optional
+	 * - should return a cache identifier or NULL to cause the cache to be
+	 *   inherited from the parent if possible or the first cache picked
+	 *   for a non-index file if not
+	 */
+	struct fscache_cache_tag *(*select_cache)(const void *parent_netfs_data,
+						  const void *cookie_netfs_data);
+
+	/* get an index key
+	 * - should store the key data in the buffer
+	 * - should return the amount of amount stored
+	 * - not permitted to return an error
+	 * - the netfs data from the cookie being used as the source is
+	 *   presented
+	 */
+	uint16_t (*get_key)(const void *cookie_netfs_data,
+			    void *buffer,
+			    uint16_t bufmax);
+
+	/* get certain file attributes from the netfs data
+	 * - this function can be absent for an index
+	 * - not permitted to return an error
+	 * - the netfs data from the cookie being used as the source is
+	 *   presented
+	 */
+	void (*get_attr)(const void *cookie_netfs_data, uint64_t *size);
+
+	/* get the auxilliary data from netfs data
+	 * - this function can be absent if the index carries no state data
+	 * - should store the auxilliary data in the buffer
+	 * - should return the amount of amount stored
+	 * - not permitted to return an error
+	 * - the netfs data from the cookie being used as the source is
+	 *   presented
+	 */
+	uint16_t (*get_aux)(const void *cookie_netfs_data,
+			    void *buffer,
+			    uint16_t bufmax);
+
+	/* consult the netfs about the state of an object
+	 * - this function can be absent if the index carries no state data
+	 * - the netfs data from the cookie being used as the target is
+	 *   presented, as is the auxilliary data
+	 */
+	fscache_checkaux_t (*check_aux)(void *cookie_netfs_data,
+					const void *data,
+					uint16_t datalen);
+
+	/* get an extra reference on a read context
+	 * - this function can be absent if the completion function doesn't
+	 *   require a context
+	 */
+	void (*get_context)(void *cookie_netfs_data, void *context);
+
+	/* release an extra reference on a read context
+	 * - this function can be absent if the completion function doesn't
+	 *   require a context
+	 */
+	void (*put_context)(void *cookie_netfs_data, void *context);
+
+	/* indicate pages that now have cache metadata retained
+	 * - this function should mark the specified pages as now being cached
+	 */
+	void (*mark_pages_cached)(void *cookie_netfs_data,
+				  struct address_space *mapping,
+				  struct pagevec *cached_pvec);
+
+	/* indicate the cookie is no longer cached
+	 * - this function is called when the backing store currently caching
+	 *   a cookie is removed
+	 * - the netfs should use this to clean up any markers indicating
+	 *   cached pages
+	 * - this is mandatory for any object that may have data
+	 */
+	void (*now_uncached)(void *cookie_netfs_data);
+};
+
+/* pattern used to fill dead space in an index entry */
+#define FSCACHE_INDEX_DEADFILL_PATTERN 0x79
+
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+extern struct fscache_cookie *__fscache_acquire_cookie(struct fscache_cookie *parent,
+						       struct fscache_cookie_def *def,
+						       void *netfs_data);
+
+extern void __fscache_relinquish_cookie(struct fscache_cookie *cookie,
+					int retire);
+
+extern void __fscache_update_cookie(struct fscache_cookie *cookie);
+#endif
+
+static inline
+struct fscache_cookie *fscache_acquire_cookie(struct fscache_cookie *parent,
+					      struct fscache_cookie_def *def,
+					      void *netfs_data)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	if (parent)
+		return __fscache_acquire_cookie(parent, def, netfs_data);
+#endif
+	return NULL;
+}
+
+static inline
+void fscache_relinquish_cookie(struct fscache_cookie *cookie,
+			       int retire)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	if (cookie)
+		__fscache_relinquish_cookie(cookie, retire);
+#endif
+}
+
+static inline
+void fscache_update_cookie(struct fscache_cookie *cookie)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	if (cookie)
+		__fscache_update_cookie(cookie);
+#endif
+}
+
+/*****************************************************************************/
+/*
+ * pin or unpin a cookie in a cache
+ * - only available for data cookies
+ */
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+extern int __fscache_pin_cookie(struct fscache_cookie *cookie);
+extern void __fscache_unpin_cookie(struct fscache_cookie *cookie);
+#endif
+
+static inline
+int fscache_pin_cookie(struct fscache_cookie *cookie)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	if (cookie)
+		return __fscache_pin_cookie(cookie);
+#endif
+	return -ENOBUFS;
+}
+
+static inline
+void fscache_unpin_cookie(struct fscache_cookie *cookie)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	if (cookie)
+		__fscache_unpin_cookie(cookie);
+#endif
+}
+
+/*****************************************************************************/
+/*
+ * fscache cached network filesystem type
+ * - name, version and ops must be filled in before registration
+ * - all other fields will be set during registration
+ */
+struct fscache_netfs
+{
+	uint32_t			version;	/* indexing version */
+	const char			*name;		/* filesystem name */
+	struct fscache_cookie		*primary_index;
+	struct fscache_netfs_operations	*ops;
+	struct list_head		link;		/* internal link */
+};
+
+struct fscache_netfs_operations
+{
+};
+
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+extern int __fscache_register_netfs(struct fscache_netfs *netfs);
+extern void __fscache_unregister_netfs(struct fscache_netfs *netfs);
+#endif
+
+static inline
+int fscache_register_netfs(struct fscache_netfs *netfs)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	return __fscache_register_netfs(netfs);
+#else
+	return 0;
+#endif
+}
+
+static inline
+void fscache_unregister_netfs(struct fscache_netfs *netfs)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	__fscache_unregister_netfs(netfs);
+#endif
+}
+
+/*****************************************************************************/
+/*
+ * look up a cache tag
+ * - cache tags are used to select specific caches in which to cache indexes
+ */
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+extern struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *name);
+extern void __fscache_release_cache_tag(struct fscache_cache_tag *tag);
+#endif
+
+static inline
+struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	return __fscache_lookup_cache_tag(name);
+#else
+	return NULL;
+#endif
+}
+
+static inline
+void fscache_release_cache_tag(struct fscache_cache_tag *tag)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	__fscache_release_cache_tag(tag);
+#endif
+}
+
+/*****************************************************************************/
+/*
+ * set the data size on a cached object
+ * - no pages beyond the end of the object will be accessible
+ * - returns -ENOBUFS if the file is not backed
+ * - returns -ENOSPC if a pinned file of that size can't be stored
+ * - returns 0 if okay
+ */
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+extern int __fscache_set_i_size(struct fscache_cookie *cookie, loff_t i_size);
+#endif
+
+static inline
+int fscache_set_i_size(struct fscache_cookie *cookie, loff_t i_size)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	if (cookie)
+		return __fscache_set_i_size(cookie, i_size);
+#endif
+	return -ENOBUFS;
+}
+
+/*****************************************************************************/
+/*
+ * reserve data space for a cached object
+ * - returns -ENOBUFS if the file is not backed
+ * - returns -ENOSPC if there isn't enough space to honour the reservation
+ * - returns 0 if okay
+ */
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+extern int __fscache_reserve_space(struct fscache_cookie *cookie, loff_t size);
+#endif
+
+static inline
+int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	if (cookie)
+		return __fscache_reserve_space(cookie, size);
+#endif
+	return -ENOBUFS;
+}
+
+/*****************************************************************************/
+/*
+ * read a page from the cache or allocate a block in which to store it
+ * - if the page is not backed by a file:
+ *   - -ENOBUFS will be returned and nothing more will be done
+ * - else if the page is backed by a block in the cache:
+ *   - a read will be started which will call end_io_func on completion
+ * - else if the page is unbacked:
+ *   - a block will be allocated
+ *   - -ENODATA will be returned
+ */
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+extern int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
+					struct page *page,
+					fscache_rw_complete_t end_io_func,
+					void *context,
+					gfp_t gfp);
+#endif
+
+static inline
+int fscache_read_or_alloc_page(struct fscache_cookie *cookie,
+			       struct page *page,
+			       fscache_rw_complete_t end_io_func,
+			       void *context,
+			       gfp_t gfp)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	if (cookie)
+		return __fscache_read_or_alloc_page(cookie, page, end_io_func,
+						    context, gfp);
+#endif
+	return -ENOBUFS;
+}
+
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+extern int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
+					 struct address_space *mapping,
+					 struct list_head *pages,
+					 unsigned *nr_pages,
+					 fscache_rw_complete_t end_io_func,
+					 void *context,
+					 gfp_t gfp);
+#endif
+
+static inline
+int fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
+				struct address_space *mapping,
+				struct list_head *pages,
+				unsigned *nr_pages,
+				fscache_rw_complete_t end_io_func,
+				void *context,
+				gfp_t gfp)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	if (cookie)
+		return __fscache_read_or_alloc_pages(cookie, mapping, pages,
+						     nr_pages, end_io_func,
+						     context, gfp);
+#endif
+	return -ENOBUFS;
+}
+
+/*
+ * allocate a block in which to store a page
+ * - if the page is not backed by a file:
+ *   - -ENOBUFS will be returned and nothing more will be done
+ * - else
+ *   - a block will be allocated if there isn't one
+ *   - 0 will be returned
+ */
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+extern int __fscache_alloc_page(struct fscache_cookie *cookie,
+				struct page *page,
+				gfp_t gfp);
+#endif
+
+static inline
+int fscache_alloc_page(struct fscache_cookie *cookie,
+		       struct page *page,
+		       gfp_t gfp)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	if (cookie)
+		return __fscache_alloc_page(cookie, page, gfp);
+#endif
+	return -ENOBUFS;
+}
+
+/*
+ * request a page be stored in the cache
+ * - this request may be ignored if no cache block is currently allocated, in
+ *   which case it:
+ *   - returns -ENOBUFS
+ * - if a cache block was already allocated:
+ *   - a BIO will be dispatched to write the page (end_io_func will be called
+ *     from the completion function)
+ *   - returns 0
+ */
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+extern int __fscache_write_page(struct fscache_cookie *cookie,
+				struct page *page,
+				fscache_rw_complete_t end_io_func,
+				void *context,
+				gfp_t gfp);
+
+extern int __fscache_write_pages(struct fscache_cookie *cookie,
+				 struct pagevec *pagevec,
+				 fscache_rw_complete_t end_io_func,
+				 void *context,
+				 gfp_t gfp);
+#endif
+
+static inline
+int fscache_write_page(struct fscache_cookie *cookie,
+		       struct page *page,
+		       fscache_rw_complete_t end_io_func,
+		       void *context,
+		       gfp_t gfp)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	if (cookie)
+		return __fscache_write_page(cookie, page, end_io_func,
+					    context, gfp);
+#endif
+	return -ENOBUFS;
+}
+
+static inline
+int fscache_write_pages(struct fscache_cookie *cookie,
+			struct pagevec *pagevec,
+			fscache_rw_complete_t end_io_func,
+			void *context,
+			gfp_t gfp)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	if (cookie)
+		return __fscache_write_pages(cookie, pagevec, end_io_func,
+					     context, gfp);
+#endif
+	return -ENOBUFS;
+}
+
+/*
+ * indicate that caching is no longer required on a page
+ * - note: cannot cancel any outstanding BIOs between this page and the cache
+ */
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+extern void __fscache_uncache_page(struct fscache_cookie *cookie,
+				   struct page *page);
+extern void __fscache_uncache_pages(struct fscache_cookie *cookie,
+				    struct pagevec *pagevec);
+#endif
+
+static inline
+void fscache_uncache_page(struct fscache_cookie *cookie,
+			  struct page *page)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	if (cookie)
+		__fscache_uncache_page(cookie, page);
+#endif
+}
+
+static inline
+void fscache_uncache_pagevec(struct fscache_cookie *cookie,
+			     struct pagevec *pagevec)
+{
+#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
+	if (cookie)
+		__fscache_uncache_pages(cookie, pagevec);
+#endif
+}
+
+#endif /* _LINUX_FSCACHE_H */
diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h
index 26b4c83..15199cc 100644
--- a/include/linux/nfs4_mount.h
+++ b/include/linux/nfs4_mount.h
@@ -65,6 +65,7 @@ #define NFS4_MOUNT_INTR		0x0002	/* 1 */
 #define NFS4_MOUNT_NOCTO	0x0010	/* 1 */
 #define NFS4_MOUNT_NOAC		0x0020	/* 1 */
 #define NFS4_MOUNT_STRICTLOCK	0x1000	/* 1 */
+#define NFS4_MOUNT_FSCACHE	0x4000	/* 1 */
 #define NFS4_MOUNT_FLAGMASK	0xFFFF
 
 #endif
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 55ea853..71cf935 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -29,6 +29,7 @@ #include <linux/nfs_fs_sb.h>
 
 #include <linux/rwsem.h>
 #include <linux/mempool.h>
+#include <linux/fscache.h>
 
 /*
  * Enable debugging support for nfs client.
@@ -180,6 +181,9 @@ #ifdef CONFIG_NFS_V4
 	int			 delegation_state;
 	struct rw_semaphore	rwsem;
 #endif /* CONFIG_NFS_V4*/
+#ifdef CONFIG_NFS_FSCACHE
+	struct fscache_cookie	*fscache;
+#endif
 	struct inode		vfs_inode;
 };
 
@@ -209,7 +213,7 @@ #define NFS_SB(s)		((struct nfs_server *
 #define NFS_FH(inode)			(&NFS_I(inode)->fh)
 #define NFS_SERVER(inode)		(NFS_SB(inode->i_sb))
 #define NFS_CLIENT(inode)		(NFS_SERVER(inode)->client)
-#define NFS_PROTO(inode)		(NFS_SERVER(inode)->rpc_ops)
+#define NFS_PROTO(inode)		(NFS_SERVER(inode)->nfs_client->rpc_ops)
 #define NFS_ADDR(inode)			(RPC_PEERADDR(NFS_CLIENT(inode)))
 #define NFS_COOKIEVERF(inode)		(NFS_I(inode)->cookieverf)
 #define NFS_READTIME(inode)		(NFS_I(inode)->read_cache_jiffies)
@@ -581,6 +585,8 @@ #define NFSDBG_XDR		0x0020
 #define NFSDBG_FILE		0x0040
 #define NFSDBG_ROOT		0x0080
 #define NFSDBG_CALLBACK		0x0100
+#define NFSDBG_CLIENT		0x0200
+#define NFSDBG_FSCACHE		0x0400
 #define NFSDBG_ALL		0xFFFF
 
 #ifdef __KERNEL__
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 6b4a13c..d0c926f 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -3,17 +3,88 @@ #define _NFS_FS_SB
 
 #include <linux/list.h>
 #include <linux/backing-dev.h>
+#include <linux/fscache.h>
 
 struct nfs_iostats;
 
 /*
+ * The nfs_client identifies our client state to the server.
+ */
+struct nfs_client {
+	atomic_t		cl_count;
+	int			cl_cons_state;	/* current construction state (-ve: init error) */
+#define NFS_CS_READY		0		/* ready to be used */
+#define NFS_CS_INITING		1		/* busy initialising */
+	int			cl_nfsversion;	/* NFS protocol version */
+	rpc_authflavor_t	cl_authflavour;
+	unsigned long		cl_svcs_state;	/* NFS services state */
+#define NFS_CS_RPCIOD		0		/* - rpciod started */
+#define NFS_CS_CALLBACK		1		/* - callback started */
+#define NFS_CS_IDMAP		2		/* - idmap started */
+	struct sockaddr_in	cl_addr;	/* server identifier */
+	char *			cl_hostname;	/* hostname of server */
+	struct list_head	cl_share_link;	/* link in global client list */
+	struct list_head	cl_superblocks;	/* List of nfs_server structs */
+
+	struct rpc_clnt *	cl_rpcclient;
+	struct nfs_rpc_ops *	rpc_ops;	/* NFS protocol vector */
+	unsigned long		retrans_timeo;	/* retransmit timeout */
+	unsigned int		retrans_count;	/* number of retransmit tries */
+
+#ifdef CONFIG_NFS_V4
+	u64			cl_clientid;	/* constant */
+	nfs4_verifier		cl_confirm;
+	unsigned long		cl_state;
+
+	u32			cl_lockowner_id;
+
+	/*
+	 * The following rwsem ensures exclusive access to the server
+	 * while we recover the state following a lease expiration.
+	 */
+	struct rw_semaphore	cl_sem;
+
+	struct list_head	cl_delegations;
+	struct list_head	cl_state_owners;
+	struct list_head	cl_unused;
+	int			cl_nunused;
+	spinlock_t		cl_lock;
+
+	unsigned long		cl_lease_time;
+	unsigned long		cl_last_renewal;
+	struct work_struct	cl_renewd;
+
+	struct rpc_wait_queue	cl_rpcwaitq;
+
+	/* used for the setclientid verifier */
+	struct timespec		cl_boot_time;
+
+	/* idmapper */
+	struct idmap *		cl_idmap;
+
+	/* Our own IP address, as a null-terminated string.
+	 * This is used to generate the clientid, and the callback address.
+	 */
+	char			cl_ipaddr[16];
+	unsigned char		cl_id_uniquifier;
+#endif
+
+#ifdef CONFIG_NFS_FSCACHE
+	struct fscache_cookie	*fscache;	/* client index cache cookie */
+#endif
+};
+
+/*
  * NFS client parameters stored in the superblock.
  */
 struct nfs_server {
+	struct nfs_client *	nfs_client;	/* shared client and NFS4 state */
+	struct list_head	client_link;	/* List of other nfs_server structs
+						 * that share the same client
+						 */
+	struct list_head	master_link;	/* link in master servers list */
 	struct rpc_clnt *	client;		/* RPC client handle */
-	struct rpc_clnt *	client_sys;	/* 2nd handle for FSINFO */
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
-	struct nfs_rpc_ops *	rpc_ops;	/* NFS protocol vector */
 	struct nfs_iostats *	io_stats;	/* I/O statistics */
 	struct backing_dev_info	backing_dev_info;
 	int			flags;		/* various flags */
@@ -29,24 +100,14 @@ struct nfs_server {
 	unsigned int		acregmax;
 	unsigned int		acdirmin;
 	unsigned int		acdirmax;
-	unsigned long		retrans_timeo;	/* retransmit timeout */
-	unsigned int		retrans_count;	/* number of retransmit tries */
 	unsigned int		namelen;
-	char *			hostname;	/* remote hostname */
-	struct nfs_fh		fh;
-	struct sockaddr_in	addr;
+
 	struct nfs_fsid		fsid;
+	__u64			maxfilesize;	/* maximum file size */
 	unsigned long		mount_time;	/* when this fs was mounted */
+	dev_t			s_dev;		/* superblock dev numbers */
+
 #ifdef CONFIG_NFS_V4
-	/* Our own IP address, as a null-terminated string.
-	 * This is used to generate the clientid, and the callback address.
-	 */
-	char			ip_addr[16];
-	char *			mnt_path;
-	struct nfs4_client *	nfs4_state;	/* all NFSv4 state starts here */
-	struct list_head	nfs4_siblings;	/* List of other nfs_server structs
-						 * that share the same clientid
-						 */
 	u32			attr_bitmask[2];/* V4 bitmask representing the set
 						   of attributes supported on this
 						   filesystem */
@@ -54,6 +115,7 @@ #ifdef CONFIG_NFS_V4
 						   that are supported on this
 						   filesystem */
 #endif
+	void (*destroy)(struct nfs_server *);
 };
 
 /* Server capabilities */
diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h
index 102e560..15a9f3b 100644
--- a/include/linux/nfs_idmap.h
+++ b/include/linux/nfs_idmap.h
@@ -62,15 +62,15 @@ struct idmap_msg {
 #ifdef __KERNEL__
 
 /* Forward declaration to make this header independent of others */
-struct nfs4_client;
+struct nfs_client;
 
-void nfs_idmap_new(struct nfs4_client *);
-void nfs_idmap_delete(struct nfs4_client *);
+int nfs_idmap_new(struct nfs_client *);
+void nfs_idmap_delete(struct nfs_client *);
 
-int nfs_map_name_to_uid(struct nfs4_client *, const char *, size_t, __u32 *);
-int nfs_map_group_to_gid(struct nfs4_client *, const char *, size_t, __u32 *);
-int nfs_map_uid_to_name(struct nfs4_client *, __u32, char *);
-int nfs_map_gid_to_group(struct nfs4_client *, __u32, char *);
+int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *);
+int nfs_map_group_to_gid(struct nfs_client *, const char *, size_t, __u32 *);
+int nfs_map_uid_to_name(struct nfs_client *, __u32, char *);
+int nfs_map_gid_to_group(struct nfs_client *, __u32, char *);
 
 extern unsigned int nfs_idmap_cache_timeout;
 #endif /* __KERNEL__ */
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index 659c754..278bb4e 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -61,6 +61,7 @@ #define NFS_MOUNT_BROKEN_SUID	0x0400	/* 
 #define NFS_MOUNT_NOACL		0x0800	/* 4 */
 #define NFS_MOUNT_STRICTLOCK	0x1000	/* reserved for NFSv4 */
 #define NFS_MOUNT_SECFLAVOUR	0x2000	/* 5 */
+#define NFS_MOUNT_FSCACHE	0x4000
 #define NFS_MOUNT_FLAGMASK	0xFFFF
 
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2d3fb64..24ceac1 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -770,6 +770,9 @@ struct nfs_rpc_ops {
 
 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fsinfo *);
+	int	(*lookupfh)(struct nfs_server *, struct nfs_fh *,
+			    struct qstr *, struct nfs_fh *,
+			    struct nfs_fattr *);
 	int	(*getattr) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fattr *);
 	int	(*setattr) (struct dentry *, struct nfs_fattr *,
@@ -806,6 +809,7 @@ struct nfs_rpc_ops {
 			    struct nfs_fsinfo *);
 	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
 			     struct nfs_pathconf *);
+	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
 	u32 *	(*decode_dirent)(u32 *, struct nfs_entry *, int plus);
 	void	(*read_setup)   (struct nfs_read_data *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5748642..6e017b7 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -71,7 +71,7 @@ #define PG_lru			 5
 #define PG_active		 6
 #define PG_slab			 7	/* slab debug (Suparna wants this) */
 
-#define PG_checked		 8	/* kill me in 2.5.<early>. */
+#define PG_fs_misc		 8
 #define PG_arch_1		 9
 #define PG_reserved		10
 #define PG_private		11	/* Has something at ->private */
@@ -161,10 +161,6 @@ #else
 #define PageHighMem(page)	0 /* needed to optimize away at compile time */
 #endif
 
-#define PageChecked(page)	test_bit(PG_checked, &(page)->flags)
-#define SetPageChecked(page)	set_bit(PG_checked, &(page)->flags)
-#define ClearPageChecked(page)	clear_bit(PG_checked, &(page)->flags)
-
 #define PageReserved(page)	test_bit(PG_reserved, &(page)->flags)
 #define SetPageReserved(page)	set_bit(PG_reserved, &(page)->flags)
 #define ClearPageReserved(page)	clear_bit(PG_reserved, &(page)->flags)
@@ -263,4 +259,13 @@ static inline void set_page_writeback(st
 	test_set_page_writeback(page);
 }
 
+/*
+ * Filesystem-specific page bit testing
+ */
+#define PageFsMisc(page)		test_bit(PG_fs_misc, &(page)->flags)
+#define SetPageFsMisc(page)		set_bit(PG_fs_misc, &(page)->flags)
+#define TestSetPageFsMisc(page)		test_and_set_bit(PG_fs_misc, &(page)->flags)
+#define ClearPageFsMisc(page)		clear_bit(PG_fs_misc, &(page)->flags)
+#define TestClearPageFsMisc(page)	test_and_clear_bit(PG_fs_misc, &(page)->flags)
+
 #endif	/* PAGE_FLAGS_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0a2f5d2..acc3481 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -170,6 +170,23 @@ static inline void wait_on_page_writebac
 extern void end_page_writeback(struct page *page);
 
 /*
+ * Wait for filesystem-specific page synchronisation to complete
+ */
+static inline void wait_on_page_fs_misc(struct page *page)
+{
+	if (PageFsMisc(page))
+		wait_on_page_bit(page, PG_fs_misc);
+}
+
+extern void fastcall end_page_fs_misc(struct page *page);
+
+/*
+ * permit installation of a state change monitor in the queue for a page
+ */
+extern void install_page_waitqueue_monitor(struct page *page,
+					   wait_queue_t *monitor);
+
+/*
  * Fault a userspace page into pagetables.  Return non-zero on a fault.
  *
  * This assumes that two userspace pages are always sufficient.  That's
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index e4b1a4d..5bbbc79 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -794,6 +794,7 @@ enum
 	FS_AIO_NR=18,	/* current system-wide number of aio requests */
 	FS_AIO_MAX_NR=19,	/* system-wide maximum number of aio requests */
 	FS_INOTIFY=20,	/* inotify submenu */
+	FS_FILE_KERNEL=21,	/* int: number of internal kernel files */
 };
 
 /* /proc/sys/fs/quota/ */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index ae40ac8..61b484f 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1314,6 +1314,8 @@ #endif
 		context->names[idx].ino   = (unsigned long)-1;
 }
 
+EXPORT_SYMBOL_GPL(__audit_inode_child);
+
 /**
  * auditsc_get_stamp - get local copies of audit_context values
  * @ctx: audit_context for the task
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 362a0cc..32043a2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -52,6 +52,9 @@ #include <asm/processor.h>
 extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
                      void __user *buffer, size_t *lenp, loff_t *ppos);
 
+extern int proc_files_kernel(ctl_table *table, int write, struct file *filp,
+                     void __user *buffer, size_t *lenp, loff_t *ppos);
+
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
@@ -993,6 +996,14 @@ static ctl_table fs_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 	{
+		.ctl_name	= FS_FILE_KERNEL,
+		.procname	= "file-kernel",
+		.data		= &files_stat,
+		.maxlen		= 1*sizeof(int),
+		.mode		= 0444,
+		.proc_handler	= &proc_files_kernel,
+	},
+	{
 		.ctl_name	= FS_DENTRY,
 		.procname	= "dentry-state",
 		.data		= &dentry_stat,
diff --git a/mm/filemap.c b/mm/filemap.c
index d087fc3..2d50899 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -520,6 +520,18 @@ void fastcall wait_on_page_bit(struct pa
 }
 EXPORT_SYMBOL(wait_on_page_bit);
 
+void install_page_waitqueue_monitor(struct page *page, wait_queue_t *monitor)
+{
+	wait_queue_head_t *q = page_waitqueue(page);
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->lock, flags);
+	__add_wait_queue(q, monitor);
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+
+EXPORT_SYMBOL_GPL(install_page_waitqueue_monitor);
+
 /**
  * unlock_page - unlock a locked page
  * @page: the page
@@ -577,6 +589,23 @@ void fastcall __lock_page(struct page *p
 }
 EXPORT_SYMBOL(__lock_page);
 
+/*
+ * Note completion of filesystem specific page synchronisation
+ *
+ * This is used to allow a page to be written to a filesystem cache in the
+ * background without holding up the completion of readpage
+ */
+void fastcall end_page_fs_misc(struct page *page)
+{
+	smp_mb__before_clear_bit();
+	if (!TestClearPageFsMisc(page))
+		BUG();
+	smp_mb__after_clear_bit();
+	__wake_up_bit(page_waitqueue(page), &page->flags, PG_fs_misc);
+}
+
+EXPORT_SYMBOL(end_page_fs_misc);
+
 /**
  * find_get_page - find and get a page reference
  * @mapping: the address_space to search
@@ -2220,6 +2249,97 @@ zero_length_segment:
 }
 EXPORT_SYMBOL(generic_file_buffered_write);
 
+/*
+ * This writes the data from the source page to the specified page offset in
+ * the nominated file
+ * - the source page does not need to have any association with the file or the
+ *   page offset
+ */
+int
+generic_file_buffered_write_one_kernel_page(struct file *file,
+					    pgoff_t index,
+					    struct page *src)
+{
+	struct address_space *mapping = file->f_mapping;
+	struct address_space_operations *a_ops = mapping->a_ops;
+	struct pagevec	lru_pvec;
+	struct page *page, *cached_page = NULL;
+	long status = 0;
+
+	pagevec_init(&lru_pvec, 0);
+
+	page = __grab_cache_page(mapping, index, &cached_page, &lru_pvec);
+	if (!page) {
+		BUG_ON(cached_page);
+		return -ENOMEM;
+	}
+
+	status = a_ops->prepare_write(file, page, 0, PAGE_CACHE_SIZE);
+	if (unlikely(status)) {
+		loff_t isize = i_size_read(mapping->host);
+
+		if (status != AOP_TRUNCATED_PAGE)
+			unlock_page(page);
+		page_cache_release(page);
+		if (status == AOP_TRUNCATED_PAGE)
+			goto sync;
+
+		/* prepare_write() may have instantiated a few blocks outside
+		 * i_size.  Trim these off again.
+		 */
+		if ((1ULL << (index + 1)) > isize)
+			vmtruncate(mapping->host, isize);
+		goto sync;
+	}
+
+	copy_highpage(page, src);
+	flush_dcache_page(page);
+
+	status = a_ops->commit_write(file, page, 0, PAGE_CACHE_SIZE);
+	if (status == AOP_TRUNCATED_PAGE) {
+		page_cache_release(page);
+		goto sync;
+	}
+
+	if (status > 0)
+		status = 0;
+
+	unlock_page(page);
+	mark_page_accessed(page);
+	page_cache_release(page);
+	if (status < 0)
+		return status;
+
+	balance_dirty_pages_ratelimited(mapping);
+	cond_resched();
+
+sync:
+	if (cached_page)
+		page_cache_release(cached_page);
+
+	/*
+	 * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
+	 */
+	if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(mapping->host))) {
+		if (!a_ops->writepage)
+			status = generic_osync_inode(
+				mapping->host, mapping,
+				OSYNC_METADATA | OSYNC_DATA);
+  	}
+
+	/*
+	 * If we get here for O_DIRECT writes then we must have fallen through
+	 * to buffered writes (block instantiation inside i_size).  So we sync
+	 * the file data here, to try to honour O_DIRECT expectations.
+	 */
+	if (unlikely(file->f_flags & O_DIRECT))
+		status = filemap_write_and_wait(mapping);
+
+	pagevec_lru_add(&lru_pvec);
+	return status;
+}
+EXPORT_SYMBOL(generic_file_buffered_write_one_kernel_page);
+
 static ssize_t
 __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t *ppos)
diff --git a/mm/migrate.c b/mm/migrate.c
index 3f1e0c2..08c9fff 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -348,8 +348,8 @@ static void migrate_page_copy(struct pag
 		SetPageUptodate(newpage);
 	if (PageActive(page))
 		SetPageActive(newpage);
-	if (PageChecked(page))
-		SetPageChecked(newpage);
+	if (PageFsMisc(page))
+		SetPageFsMisc(newpage);
 	if (PageMappedToDisk(page))
 		SetPageMappedToDisk(newpage);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 54a4f53..8bb003b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -550,7 +550,7 @@ static int prep_new_page(struct page *pa
 
 	page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
 			1 << PG_referenced | 1 << PG_arch_1 |
-			1 << PG_checked | 1 << PG_mappedtodisk);
+			1 << PG_fs_misc | 1 << PG_mappedtodisk);
 	set_page_private(page, 0);
 	set_page_refcounted(page);
 	kernel_map_pages(page, 1 << order, 1);
diff --git a/mm/readahead.c b/mm/readahead.c
index aa7ec42..86a40db 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -14,6 +14,7 @@ #include <linux/module.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
+#include <linux/buffer_head.h>
 
 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 {
@@ -117,6 +118,26 @@ static inline unsigned long get_next_ra_
 
 #define list_to_page(head) (list_entry((head)->prev, struct page, lru))
 
+/*
+ * see if a page needs releasing upon read_cache_pages() failure
+ * - the caller of read_cache_pages() may have set PG_private before calling,
+ *   such as the NFS fs marking pages that are cached locally on disk, thus we
+ *   need to give the fs a chance to clean up in the event of an error
+ */
+static inline void read_cache_pages_release_page(struct address_space *mapping,
+						 struct page *page)
+{
+	if (PagePrivate(page)) {
+		if (TestSetPageLocked(page))
+			BUG();
+		page->mapping = mapping;
+		try_to_release_page(page, GFP_KERNEL);
+		page->mapping = NULL;
+		unlock_page(page);
+	}
+	page_cache_release(page);
+}
+
 /**
  * read_cache_pages - populate an address space with some pages & start reads against them
  * @mapping: the address_space
@@ -140,7 +161,7 @@ int read_cache_pages(struct address_spac
 		page = list_to_page(pages);
 		list_del(&page->lru);
 		if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) {
-			page_cache_release(page);
+			read_cache_pages_release_page(mapping, page);
 			continue;
 		}
 		ret = filler(data, page);
@@ -152,7 +173,7 @@ int read_cache_pages(struct address_spac
 
 				victim = list_to_page(pages);
 				list_del(&victim->lru);
-				page_cache_release(victim);
+				read_cache_pages_release_page(mapping, victim);
 			}
 			break;
 		}