All of the above --- Documentation/filesystems/caching/backend-api.txt | 357 +++++ Documentation/filesystems/caching/cachefiles.txt | 281 ++++ Documentation/filesystems/caching/fscache.txt | 151 ++ Documentation/filesystems/caching/netfs-api.txt | 752 +++++++++++ Documentation/sysctl/fs.txt | 6 fs/Kconfig | 49 + fs/Makefile | 2 fs/afs/cache.h | 27 fs/afs/cell.c | 109 +- fs/afs/cell.h | 16 fs/afs/cmservice.c | 2 fs/afs/dir.c | 15 fs/afs/file.c | 246 ++-- fs/afs/fsclient.c | 4 fs/afs/inode.c | 45 - fs/afs/internal.h | 25 fs/afs/main.c | 24 fs/afs/mntpt.c | 12 fs/afs/proc.c | 1 fs/afs/server.c | 3 fs/afs/vlocation.c | 179 ++- fs/afs/vnode.c | 248 +++- fs/afs/vnode.h | 10 fs/afs/volume.c | 78 - fs/afs/volume.h | 28 fs/autofs4/autofs_i.h | 3 fs/autofs4/init.c | 2 fs/autofs4/inode.c | 22 fs/autofs4/waitq.c | 1 fs/buffer.c | 2 fs/cachefiles/Makefile | 18 fs/cachefiles/cf-bind.c | 283 ++++ fs/cachefiles/cf-interface.c | 1315 +++++++++++++++++++ fs/cachefiles/cf-key.c | 160 ++ fs/cachefiles/cf-main.c | 131 ++ fs/cachefiles/cf-namei.c | 837 ++++++++++++ fs/cachefiles/cf-proc.c | 510 +++++++ fs/cachefiles/cf-sysctl.c | 69 + fs/cachefiles/cf-xattr.c | 299 ++++ fs/cachefiles/internal.h | 308 ++++ fs/dcache.c | 297 ++++ fs/ext2/dir.c | 6 fs/ext3/inode.c | 10 fs/fcntl.c | 2 fs/file_table.c | 49 + fs/freevxfs/vxfs_subr.c | 2 fs/fscache/Makefile | 11 fs/fscache/cookie.c | 1063 ++++++++++++++++ fs/fscache/fscache-int.h | 93 + fs/fscache/fsdef.c | 113 ++ fs/fscache/main.c | 105 ++ fs/fscache/page.c | 548 ++++++++ fs/nfs/Makefile | 7 fs/nfs/callback.c | 31 fs/nfs/callback.h | 7 fs/nfs/callback_proc.c | 13 fs/nfs/client.c | 1439 +++++++++++++++++++++ fs/nfs/delegation.c | 35 - fs/nfs/delegation.h | 10 fs/nfs/dir.c | 18 fs/nfs/file.c | 35 - fs/nfs/fscache.c | 349 +++++ fs/nfs/fscache.h | 466 +++++++ fs/nfs/getroot.c | 306 ++++ fs/nfs/idmap.c | 39 - fs/nfs/inode.c | 35 - fs/nfs/internal.h | 134 +- fs/nfs/namespace.c | 33 fs/nfs/nfs3proc.c | 8 fs/nfs/nfs4_fs.h | 78 - fs/nfs/nfs4namespace.c | 118 +- fs/nfs/nfs4proc.c | 165 +- fs/nfs/nfs4renewd.c | 19 fs/nfs/nfs4state.c | 174 --- fs/nfs/nfs4xdr.c | 42 - fs/nfs/pagelist.c | 3 fs/nfs/proc.c | 4 fs/nfs/read.c | 32 fs/nfs/super.c | 1400 +++++++------------- fs/nfs/sysctl.c | 43 + fs/nfs/write.c | 13 fs/open.c | 20 fs/reiserfs/inode.c | 10 fs/super.c | 12 fs/ufs/dir.c | 6 include/linux/dcache.h | 2 include/linux/file.h | 1 include/linux/fs.h | 10 include/linux/fscache-cache.h | 243 ++++ include/linux/fscache.h | 496 +++++++ include/linux/nfs4_mount.h | 1 include/linux/nfs_fs.h | 8 include/linux/nfs_fs_sb.h | 94 + include/linux/nfs_idmap.h | 14 include/linux/nfs_mount.h | 1 include/linux/nfs_xdr.h | 4 include/linux/page-flags.h | 15 include/linux/pagemap.h | 17 include/linux/sysctl.h | 1 kernel/auditsc.c | 2 kernel/sysctl.c | 11 mm/filemap.c | 120 ++ mm/migrate.c | 4 mm/page_alloc.c | 2 mm/readahead.c | 25 105 files changed, 13153 insertions(+), 1951 deletions(-) diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt new file mode 100644 index 0000000..1dd601b --- /dev/null +++ b/Documentation/filesystems/caching/backend-api.txt @@ -0,0 +1,357 @@ + ========================== + FS-CACHE CACHE BACKEND API + ========================== + +The FS-Cache system provides an API by which actual caches can be supplied to +FS-Cache for it to then serve out to network filesystems and other interested +parties. + +This API is declared in . + + +==================================== +INITIALISING AND REGISTERING A CACHE +==================================== + +To start off, a cache definition must be initialised and registered for each +cache the backend wants to make available. For instance, CacheFS does this in +the fill_super() operation on mounting. + +The cache definition (struct fscache_cache) should be initialised by calling: + + void fscache_init_cache(struct fscache_cache *cache, + struct fscache_cache_ops *ops, + const char *idfmt, + ...) + +Where: + + (*) "cache" is a pointer to the cache definition; + + (*) "ops" is a pointer to the table of operations that the backend supports on + this cache; + + (*) and a format and printf-style arguments for constructing a label for the + cache. + + +The cache should then be registered with FS-Cache by passing a pointer to the +previously initialised cache definition to: + + int fscache_add_cache(struct fscache_cache *cache, + struct fscache_object *fsdef, + const char *tagname); + +Two extra arguments should also be supplied: + + (*) "fsdef" which should point to the object representation for the FS-Cache + master index in this cache. Netfs primary index entries will be created + here. + + (*) "tagname" which, if given, should be a text string naming this cache. If + this is NULL, the identifier will be used instead. For CacheFS, the + identifier is set to name the underlying block device and the tag can be + supplied by mount. + +This function may return -ENOMEM if it ran out of memory or -EEXIST if the tag +is already in use. 0 will be returned on success. + + +===================== +UNREGISTERING A CACHE +===================== + +A cache can be withdrawn from the system by calling this function with a +pointer to the cache definition: + + void fscache_withdraw_cache(struct fscache_cache *cache) + +In CacheFS's case, this is called by put_super(). + + +================== +FS-CACHE UTILITIES +================== + +FS-Cache provides some utilities that a cache backend may make use of: + + (*) Find the parent of an object: + + struct fscache_object * + fscache_find_parent_object(struct fscache_object *object) + + This allows a backend to find the logical parent of an index or data file + in the cache hierarchy. + + (*) Note occurrence of an I/O error in a cache: + + void fscache_io_error(struct fscache_cache *cache) + + This tells FS-Cache that an I/O error occurred in the cache. After this + has been called, only resource dissociation operations (object and page + release) will be passed from the netfs to the cache backend for the + specified cache. + + This does not actually withdraw the cache. That must be done separately. + + (*) Get an extra reference to a read or write context: + + void *fscache_get_context(struct fscache_cookie *cookie, void *context) + + and release a reference: + + void *fscache_put_context(struct fscache_cookie *cookie, void *context) + + These should be used to maintain the presence of the read or write context + passed to the cache read/write functions. This context must then be + passed to the I/O completion function. + + +======================== +RELEVANT DATA STRUCTURES +======================== + + (*) Index/Data file FS-Cache representation cookie: + + struct fscache_cookie { + struct fscache_object_def *def; + struct fscache_netfs *netfs; + void *netfs_data; + ... + }; + + The fields that might be of use to the backend describe the object + definition, the netfs definition and the netfs's data for this cookie. + The object definition contain functions supplied by the netfs for loading + and matching index entries; these are required to provide some of the + cache operations. + + (*) In-cache object representation: + + struct fscache_object { + struct fscache_cache *cache; + struct fscache_cookie *cookie; + unsigned long flags; + #define FSCACHE_OBJECT_RECYCLING 1 + ... + }; + + Structures of this type should be allocated by the cache backend and + passed to FS-Cache when requested by the appropriate cache operation. In + the case of CacheFS, they're embedded in CacheFS's internal object + structures. + + Each object contains a pointer to the cookie that represents the object it + is backing. It also contains a flag that indicates whether the object is + being retired when put_object() is called. This should be initialised by + calling fscache_object_init(object). + + +================ +CACHE OPERATIONS +================ + +The cache backend provides FS-Cache with a table of operations that can be +performed on the denizens of the cache. These are held in a structure of type: + + struct fscache_cache_ops + + (*) Name of cache provider [mandatory]: + + const char *name + + This isn't strictly an operation, but should be pointed at a string naming + the backend. + + (*) Object lookup [mandatory]: + + struct fscache_object *(*lookup_object)(struct fscache_cache *cache, + struct fscache_object *parent, + struct fscache_cookie *cookie) + + This method is used to look up an object in the specified cache, given a + pointer to the parent object and the cookie to which the object will be + attached. This should instantiate that object in the cache if it can, or + return -ENOBUFS or -ENOMEM if it can't. + + (*) Increment object refcount [mandatory]: + + struct fscache_object *(*grab_object)(struct fscache_object *object) + + This method is called to increment the reference count on an object. It + may fail (for instance if the cache is being withdrawn) by returning NULL. + It should return the object pointer if successful. + + (*) Lock/Unlock object [mandatory]: + + void (*lock_object)(struct fscache_object *object) + void (*unlock_object)(struct fscache_object *object) + + These methods are used to exclusively lock an object. It must be possible + to schedule with the lock held, so a spinlock isn't sufficient. + + (*) Pin/Unpin object [optional]: + + int (*pin_object)(struct fscache_object *object) + void (*unpin_object)(struct fscache_object *object) + + These methods are used to pin an object into the cache. Once pinned an + object cannot be reclaimed to make space. Return -ENOSPC if there's not + enough space in the cache to permit this. + + (*) Update object [mandatory]: + + int (*update_object)(struct fscache_object *object) + + This is called to update the index entry for the specified object. The + new information should be in object->cookie->netfs_data. This can be + obtained by calling object->cookie->def->get_aux()/get_attr(). + + (*) Release object reference [mandatory]: + + void (*put_object)(struct fscache_object *object) + + This method is used to discard a reference to an object. The object may + be destroyed when all the references held by FS-Cache are released. + + (*) Synchronise a cache [mandatory]: + + void (*sync)(struct fscache_cache *cache) + + This is called to ask the backend to synchronise a cache with its backing + device. + + (*) Dissociate a cache [mandatory]: + + void (*dissociate_pages)(struct fscache_cache *cache) + + This is called to ask a cache to perform any page dissociations as part of + cache withdrawal. + + (*) Set the data size on a cache file [mandatory]: + + int (*set_i_size)(struct fscache_object *object, loff_t i_size); + + This is called to indicate to the cache the maximum size a file may reach. + The cache may use this to reserve space on the cache. It may also return + -ENOBUFS to indicate that insufficient space is available to expand the + metadata used to track the data. It should return 0 if successful or + -ENOMEM or -EIO on error. + + (*) Reserve cache space for an object's data [optional]: + + int (*reserve_space)(struct fscache_object *object, loff_t size); + + This is called to request that cache space be reserved to hold the data + for an object and the metadata used to track it. Zero size should be + taken as request to cancel a reservation. + + This should return 0 if successful, -ENOSPC if there isn't enough space + available, or -ENOMEM or -EIO on other errors. + + The reservation may exceed the size of the object, thus permitting future + expansion. If the amount of space consumed by an object would exceed the + reservation, it's permitted to refuse requests to allocate pages, but not + required. An object may be pruned down to its reservation size if larger + than that already. + + (*) Request page be read from cache [mandatory]: + + int (*read_or_alloc_page)(struct fscache_object *object, + struct page *page, + fscache_rw_complete_t end_io_func, + void *end_io_data, + gfp_t gfp) + + This is called to attempt to read a netfs page from the cache, or to + reserve a backing block if not. FS-Cache will have done as much checking + as it can before calling, but most of the work belongs to the backend. + + If there's no page in the cache, then -ENODATA should be returned if the + backend managed to reserve a backing block; -ENOBUFS, -ENOMEM or -EIO if + it didn't. + + If there is a page in the cache, then a read operation should be queued + and 0 returned. When the read finishes, end_io_func() should be called + with the following arguments: + + (*end_io_func)(object->cookie->netfs_data, + page, + end_io_data, + error); + + The mark_pages_cached() cookie operation should be called for the page if + any cache metadata is retained. This will indicate to the netfs that the + page needs explicit uncaching. This operation takes a pagevec, thus + allowing several pages to be marked at once. + + (*) Request pages be read from cache [mandatory]: + + int (*read_or_alloc_pages)(struct fscache_object *object, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages, + fscache_rw_complete_t end_io_func, + void *end_io_data, + gfp_t gfp) + + This is like the previous operation, except it will be handed a list of + pages instead of one page. Any pages on which a read operation is started + must be added to the page cache for the specified mapping and also to the + LRU. Such pages must also be removed from the pages list and nr_pages + decremented per page. + + If there was an error such as -ENOMEM, then that should be returned; else + if one or more pages couldn't be read or allocated, then -ENOBUFS should + be returned; else if one or more pages couldn't be read, then -ENODATA + should be returned. If all the pages are dispatched then 0 should be + returned. + + (*) Request page be allocated in the cache [mandatory]: + + int (*allocate_page)(struct fscache_object *object, + struct page *page, + gfp_t gfp) + + This is like read_or_alloc_page(), except that it shouldn't read from the + cache, even if there's data there that could be retrieved. It should, + however, set up any internal metadata required such that write_page() can + write to the cache. + + If there's no backing block available, then -ENOBUFS should be returned + (or -ENOMEM or -EIO if there were other problems). If a block is + successfully allocated, then the netfs page should be marked and 0 + returned. + + (*) Request page be written to cache [mandatory]: + + int (*write_page)(struct fscache_object *object, + struct page *page, + fscache_rw_complete_t end_io_func, + void *end_io_data, + gfp_t gfp) + + This is called to write from a page on which there was a previously + successful read_or_alloc_page() call. FS-Cache filters out pages that + don't have mappings. + + If there's no backing block available, then -ENOBUFS should be returned + (or -ENOMEM or -EIO if there were other problems). + + If the write operation could be queued, then 0 should be returned. When + the write completes, end_io_func() should be called with the following + arguments: + + (*end_io_func)(object->cookie->netfs_data, + page, + end_io_data, + error); + + (*) Discard retained per-page metadata [mandatory]: + + void (*uncache_pages)(struct fscache_object *object, + struct pagevec *pagevec) + + This is called when one or more netfs pages are being evicted from the + pagecache. The cache backend should tear down any internal representation + or tracking it maintains. diff --git a/Documentation/filesystems/caching/cachefiles.txt b/Documentation/filesystems/caching/cachefiles.txt new file mode 100644 index 0000000..37b6385 --- /dev/null +++ b/Documentation/filesystems/caching/cachefiles.txt @@ -0,0 +1,281 @@ + =============================================== + CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM + =============================================== + +Contents: + + (*) Overview. + + (*) Requirements. + + (*) Configuration. + + (*) Starting the cache. + + (*) Things to avoid. + + +======== +OVERVIEW +======== + +CacheFiles is a caching backend that's meant to use as a cache a directory on +an already mounted filesystem of a local type (such as Ext3). + +CacheFiles uses a userspace daemon to do some of the cache management - such as +reaping stale nodes and culling. This is called cachefilesd and lives in +/sbin. + +The filesystem and data integrity of the cache are only as good as those of the +filesystem providing the backing services. Note that CacheFiles does not +attempt to journal anything since the journalling interfaces of the various +filesystems are very specific in nature. + +CacheFiles creates a proc-file - "/proc/fs/cachefiles" - that is used for +communication with the daemon. Only one thing may have this open at once, and +whilst it is open, a cache is at least partially in existence. The daemon +opens this and sends commands down it to control the cache. + +CacheFiles is currently limited to a single cache. + +CacheFiles attempts to maintain at least a certain percentage of free space on +the filesystem, shrinking the cache by culling the objects it contains to make +space if necessary - see the "Cache Culling" section. This means it can be +placed on the same medium as a live set of data, and will expand to make use of +spare space and automatically contract when the set of data requires more +space. + + +============ +REQUIREMENTS +============ + +The use of CacheFiles and its daemon requires the following features to be +available in the system and in the cache filesystem: + + - dnotify. + + - extended attributes (xattrs). + + - openat() and friends. + + - bmap() support on files in the filesystem (FIBMAP ioctl). + + - The use of bmap() to detect a partial page at the end of the file. + +It is strongly recommended that the "dir_index" option is enabled on Ext3 +filesystems being used as a cache. + + +============= +CONFIGURATION +============= + +The cache is configured by a script in /etc/cachefilesd.conf. These commands +set up cache ready for use. The following script commands are available: + + (*) brun % + (*) bcull % + (*) bstop % + + Configure the culling limits. Optional. See the section on culling + The defaults are 7%, 5% and 1% respectively. + + (*) dir + + Specify the directory containing the root of the cache. Mandatory. + + (*) tag + + Specify a tag to FS-Cache to use in distinguishing multiple caches. + Optional. The default is "CacheFiles". + + (*) debug + + Specify a numeric bitmask to control debugging in the kernel module. + Optional. The default is zero (all off). The following values can be + OR'd into the mask to collect various information: + + 1 Turn on trace of function entry (_enter() macros) + 2 Turn on trace of function exit (_leave() macros) + 4 Turn on trace of internal debug points (_debug()) + + This mask can also be set through /proc/sys/fs/cachefiles/debug. + + +================== +STARTING THE CACHE +================== + +The cache is started by running the daemon. The daemon opens the cache proc +file, configures the cache and tells it to begin caching. At that point the +cache binds to fscache and the cache becomes live. + +The daemon is run as follows: + + /sbin/cachefilesd [-d]* [-s] [-n] [-f ] + +The flags are: + + (*) -d + + Increase the debugging level. This can be specified multiple times and + is cumulative with itself. + + (*) -s + + Send messages to stderr instead of syslog. + + (*) -n + + Don't daemonise and go into background. + + (*) -f + + Use an alternative configuration file rather than the default one. + + +=============== +THINGS TO AVOID +=============== + +Do not mount other things within the cache as this will cause problems. The +kernel module contains its own very cut-down path walking facility that ignores +mountpoints, but the daemon can't avoid them. + +Do not create, rename or unlink files and directories in the cache whilst the +cache is active, as this may cause the state to become uncertain. + +Renaming files in the cache might make objects appear to be other objects (the +filename is part of the lookup key). + +Do not change or remove the extended attributes attached to cache files by the +cache as this will cause the cache state management to get confused. + +Do not create files or directories in the cache, lest the cache get confused or +serve incorrect data. + +Do not chmod files in the cache. The module creates things with minimal +permissions to prevent random users being able to access them directly. + + +============= +CACHE CULLING +============= + +The cache may need culling occasionally to make space. This involves +discarding objects from the cache that have been used less recently than +anything else. Culling is based on the access time of data objects. Empty +directories are culled if not in use. + +Cache culling is done on the basis of the percentage of blocks available in the +underlying filesystem. There are three "limits": + + (*) brun + + If the amount of available space in the cache rises above this limit, then + culling is turned off. + + (*) bcull + + If the amount of available space in the cache falls below this limit, then + culling is started. + + (*) bstop + + If the amount of available space in the cache falls below this limit, then + no further allocation of disk space is permitted until culling has raised + the amount above this limit again. + +These must be configured thusly: + + 0 <= bstop < bcull < brun < 100 + +Note that these are percentages of available space, and do _not_ appear as 100 +minus the percentage displayed by the "df" program. + +The userspace daemon scans the cache to build up a table of cullable objects. +These are then culled in least recently used order. A new scan of the cache is +started as soon as space is made in the table. Objects will be skipped if +their atimes have changed or if the kernel module says it is still using them. + + +=============== +CACHE STRUCTURE +=============== + +The CacheFiles module will create two directories in the directory it was +given: + + (*) cache/ + + (*) graveyard/ + +The active cache objects all reside in the first directory. The CacheFiles +kernel module moves any retired or culled objects that it can't simply unlink +to the graveyard from which the daemon will actually delete them. + +The daemon uses dnotify to monitor the graveyard directory, and will delete +anything that appears therein. + + +The module represents index objects as directories with the filename "I..." or +"J...". Note that the "cache/" directory is itself a special index. + +Data objects are represented as files if they have no children, or directories +if they do. Their filenames all begin "D..." or "E...". If represented as a +directory, data objects will have a file in the directory called "data" that +actually holds the data. + +Special objects are similar to data objects, except their filenames begin +"S..." or "T...". + + +If an object has children, then it will be represented as a directory. +Immediately in the representative directory are a collection of directories +named for hash values of the child object keys with an '@' prepended. Into +this directory, if possible, will be placed the representations of the child +objects: + + INDEX INDEX INDEX DATA FILES + ========= ========== ================================= ================ + cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400 + cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry + cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry + cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry + + +If the key is so long that it exceeds NAME_MAX with the decorations added on to +it, then it will be cut into pieces, the first few of which will be used to +make a nest of directories, and the last one of which will be the objects +inside the last directory. The names of the intermediate directories will have +'+' prepended: + + J1223/@23/+xy...z/+kl...m/Epqr + + +Note that keys are raw data, and not only may they exceed NAME_MAX in size, +they may also contain things like '/' and NUL characters, and so they may not +be suitable for turning directly into a filename. + +To handle this, CacheFiles will use a suitably printable filename directly and +"base-64" encode ones that aren't directly suitable. The two versions of +object filenames indicate the encoding: + + OBJECT TYPE PRINTABLE ENCODED + =============== =============== =============== + Index "I..." "J..." + Data "D..." "E..." + Special "S..." "T..." + +Intermediate directories are always "@" or "+" as appropriate. + + +Each object in the cache has an extended attribute label that holds the object +type ID (required to distinguish special objects) and the auxiliary data from +the netfs. The latter is used to detect stale objects in the cache and update +or retire them. + + +Note that CacheFiles will erase from the cache any file it doesn't recognise or +any file of an incorrect type (such as a FIFO file or a device file). diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt new file mode 100644 index 0000000..82c3168 --- /dev/null +++ b/Documentation/filesystems/caching/fscache.txt @@ -0,0 +1,151 @@ + ========================== + General Filesystem Caching + ========================== + +======== +OVERVIEW +======== + +This facility is a general purpose cache for network filesystems, though it +could be used for caching other things such as ISO9660 filesystems too. + +FS-Cache mediates between cache backends (such as CacheFS) and network +filesystems: + + +---------+ + | | +--------------+ + | NFS |--+ | | + | | | +-->| CacheFS | + +---------+ | +----------+ | | /dev/hda5 | + | | | | +--------------+ + +---------+ +-->| | | + | | | |--+ + | AFS |----->| FS-Cache | + | | | |--+ + +---------+ +-->| | | + | | | | +--------------+ + +---------+ | +----------+ | | | + | | | +-->| CacheFiles | + | ISOFS |--+ | /var/cache | + | | +--------------+ + +---------+ + + +FS-Cache does not follow the idea of completely loading every netfs file +opened in its entirety into a cache before permitting it to be accessed and +then serving the pages out of that cache rather than the netfs inode because: + + (1) It must be practical to operate without a cache. + + (2) The size of any accessible file must not be limited to the size of the + cache. + + (3) The combined size of all opened files (this includes mapped libraries) + must not be limited to the size of the cache. + + (4) The user should not be forced to download an entire file just to do a + one-off access of a small portion of it (such as might be done with the + "file" program). + +It instead serves the cache out in PAGE_SIZE chunks as and when requested by +the netfs('s) using it. + + +FS-Cache provides the following facilities: + + (1) More than one cache can be used at once. Caches can be selected + explicitly by use of tags. + + (2) Caches can be added / removed at any time. + + (3) The netfs is provided with an interface that allows either party to + withdraw caching facilities from a file (required for (2)). + + (4) The interface to the netfs returns as few errors as possible, preferring + rather to let the netfs remain oblivious. + + (5) Cookies are used to represent indices, files and other objects to the + netfs. The simplest cookie is just a NULL pointer - indicating nothing + cached there. + + (6) The netfs is allowed to propose - dynamically - any index hierarchy it + desires, though it must be aware that the index search function is + recursive, stack space is limited, and indices can only be children of + indices. + + (7) Data I/O is done direct to and from the netfs's pages. The netfs + indicates that page A is at index B of the data-file represented by cookie + C, and that it should be read or written. The cache backend may or may + not start I/O on that page, but if it does, a netfs callback will be + invoked to indicate completion. The I/O may be either synchronous or + asynchronous. + + (8) Cookies can be "retired" upon release. At this point FS-Cache will mark + them as obsolete and the index hierarchy rooted at that point will get + recycled. + + (9) The netfs provides a "match" function for index searches. In addition to + saying whether a match was made or not, this can also specify that an + entry should be updated or deleted. + + +FS-Cache maintains a virtual indexing tree in which all indices, files, objects +and pages are kept. Bits of this tree may actually reside in one or more +caches. + + FSDEF + | + +------------------------------------+ + | | + NFS AFS + | | + +--------------------------+ +-----------+ + | | | | + homedir mirror afs.org redhat.com + | | | + +------------+ +---------------+ +----------+ + | | | | | | + 00001 00002 00007 00125 vol00001 vol00002 + | | | | | + +---+---+ +-----+ +---+ +------+------+ +-----+----+ + | | | | | | | | | | | | | +PG0 PG1 PG2 PG0 XATTR PG0 PG1 DIRENT DIRENT DIRENT R/W R/O Bak + | | + PG0 +-------+ + | | + 00001 00003 + | + +---+---+ + | | | + PG0 PG1 PG2 + +In the example above, you can see two netfs's being backed: NFS and AFS. These +have different index hierarchies: + + (*) The NFS primary index contains per-server indices. Each server index is + indexed by NFS file handles to get data file objects. Each data file + objects can have an array of pages, but may also have further child + objects, such as extended attributes and directory entries. Extended + attribute objects themselves have page-array contents. + + (*) The AFS primary index contains per-cell indices. Each cell index contains + per-logical-volume indices. Each of volume index contains up to three + indices for the read-write, read-only and backup mirrors of those volumes. + Each of these contains vnode data file objects, each of which contains an + array of pages. + +The very top index is the FS-Cache master index in which individual netfs's +have entries. + +Any index object may reside in more than one cache, provided it only has index +children. Any index with non-index object children will be assumed to only +reside in one cache. + + +The netfs API to FS-Cache can be found in: + + Documentation/filesystems/caching/netfs-api.txt + +The cache backend API to FS-Cache can be found in: + + Documentation/filesystems/caching/backend-api.txt diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt new file mode 100644 index 0000000..a1a182b --- /dev/null +++ b/Documentation/filesystems/caching/netfs-api.txt @@ -0,0 +1,752 @@ + =============================== + FS-CACHE NETWORK FILESYSTEM API + =============================== + +There's an API by which a network filesystem can make use of the FS-Cache +facilities. This is based around a number of principles: + + (1) Caches can store a number of different object types. There are two main + object types: indices and files. The first is a special type used by + FS-Cache to make finding objects faster and to make retiring of groups of + objects easier. + + (2) Every index, file or other object is represented by a cookie. This cookie + may or may not have anything associated with it, but the netfs doesn't + need to care. + + (3) Barring the top-level index (one entry per cached netfs), the index + hierarchy for each netfs is structured according the whim of the netfs. + +This API is declared in . + +This document contains the following sections: + + (1) Network filesystem definition + (2) Index definition + (3) Object definition + (4) Network filesystem (un)registration + (5) Cache tag lookup + (6) Index registration + (7) Data file registration + (8) Miscellaneous object registration + (9) Setting the data file size + (10) Page alloc/read/write + (11) Page uncaching + (12) Index and data file update + (13) Miscellaneous cookie operations + (14) Cookie unregistration + (15) Index and data file invalidation + + +============================= +NETWORK FILESYSTEM DEFINITION +============================= + +FS-Cache needs a description of the network filesystem. This is specified +using a record of the following structure: + + struct fscache_netfs { + uint32_t version; + const char *name; + struct fscache_netfs_operations *ops; + struct fscache_cookie *primary_index; + ... + }; + +This first three fields should be filled in before registration, and the fourth +will be filled in by the registration function; any other fields should just be +ignored and are for internal use only. + +The fields are: + + (1) The name of the netfs (used as the key in the toplevel index). + + (2) The version of the netfs (if the name matches but the version doesn't, the + entire in-cache hierarchy for this netfs will be scrapped and begun + afresh). + + (3) The operations table is defined as follows: + + struct fscache_netfs_operations { + }; + + Currently there aren't any functions here. + + (4) The cookie representing the primary index will be allocated according to + another parameter passed into the registration function. + +For example, kAFS (linux/fs/afs/) uses the following definitions to describe +itself: + + static struct fscache_netfs_operations afs_cache_ops = { + }; + + struct fscache_netfs afs_cache_netfs = { + .version = 0, + .name = "afs", + .ops = &afs_cache_ops, + }; + + +================ +INDEX DEFINITION +================ + +Indices are used for two purposes: + + (1) To aid the finding of a file based on a series of keys (such as AFS's + "cell", "volume ID", "vnode ID"). + + (2) To make it easier to discard a subset of all the files cached based around + a particular key - for instance to mirror the removal of an AFS volume. + +However, since it's unlikely that any two netfs's are going to want to define +their index hierarchies in quite the same way, FS-Cache tries to impose as few +restraints as possible on how an index is structured and where it is placed in +the tree. The netfs can even mix indices and data files at the same level, but +it's not recommended. + +Each index entry consists of a key of indeterminate length plus some auxilliary +data, also of indeterminate length. + +There are some limits on indices: + + (1) Any index containing non-index objects should be restricted to a single + cache. Any such objects created within an index will be created in the + first cache only. The cache in which an index is created can be + controlled by cache tags (see below). + + (2) The entry data must be atomically journallable, so it is limited to about + 400 bytes at present. At least 400 bytes will be available. + + (3) The depth of the index tree should be judged with care as the search + function is recursive. Too many layers will run the kernel out of stack. + + +================= +OBJECT DEFINITION +================= + +To define an object, a structure of the following type should be filled out: + + struct fscache_object_def + { + uint8_t name[16]; + uint8_t type; + + struct fscache_cache_tag *(*select_cache)( + const void *parent_netfs_data, + const void *cookie_netfs_data); + + uint16_t (*get_key)(const void *cookie_netfs_data, + void *buffer, + uint16_t bufmax); + + void (*get_attr)(const void *cookie_netfs_data, + uint64_t *size); + + uint16_t (*get_aux)(const void *cookie_netfs_data, + void *buffer, + uint16_t bufmax); + + fscache_checkaux_t (*check_aux)(void *cookie_netfs_data, + const void *data, + uint16_t datalen); + + void (*get_context)(void *cookie_netfs_data, void *context); + + void (*put_context)(void *cookie_netfs_data, void *context); + + void (*mark_pages_cached)(void *cookie_netfs_data, + struct address_space *mapping, + struct pagevec *cached_pvec); + + void (*now_uncached)(void *cookie_netfs_data); + }; + +This has the following fields: + + (1) The type of the object [mandatory]. + + This is one of the following values: + + (*) FSCACHE_COOKIE_TYPE_INDEX + + This defines an index, which is a special FS-Cache type. + + (*) FSCACHE_COOKIE_TYPE_DATAFILE + + This defines an ordinary data file. + + (*) Any other value between 2 and 255 + + This defines an extraordinary object such as an XATTR. + + (2) The name of the object type (NUL terminated unless all 16 chars are used) + [optional]. + + (3) A function to select the cache in which to store an index [optional]. + + This function is invoked when an index needs to be instantiated in a cache + during the instantiation of a non-index object. Only the immediate index + parent for the non-index object will be queried. Any indices above that + in the hierarchy may be stored in multiple caches. This function does not + need to be supplied for any non-index object or any index that will only + have index children. + + If this function is not supplied or if it returns NULL then the first + cache in the parent's list will be chosed, or failing that, the first + cache in the master list. + + (4) A function to retrieve an object's key from the netfs [mandatory]. + + This function will be called with the netfs data that was passed to the + cookie acquisition function and the maximum length of key data that it may + provide. It should write the required key data into the given buffer and + return the quantity it wrote. + + (5) A function to retrieve attribute data from the netfs [optional]. + + This function will be called with the netfs data that was passed to the + cookie acquisition function. It should return the size of the file if + this is a data file. The size may be used to govern how much cache must + be reserved for this file in the cache. + + If the function is absent, a file size of 0 is assumed. + + (6) A function to retrieve auxilliary data from the netfs [optional]. + + This function will be called with the netfs data that was passed to the + cookie acquisition function and the maximum length of auxilliary data that + it may provide. It should write the auxilliary data into the given buffer + and return the quantity it wrote. + + If this function is absent, the auxilliary data length will be set to 0. + + The length of the auxilliary data buffer may be dependent on the key + length. A netfs mustn't rely on being able to provide more than 400 bytes + for both. + + (7) A function to check the auxilliary data [optional]. + + This function will be called to check that a match found in the cache for + this object is valid. For instance with AFS it could check the auxilliary + data against the data version number returned by the server to determine + whether the index entry in a cache is still valid. + + If this function is absent, it will be assumed that matching objects in a + cache are always valid. + + If present, the function should return one of the following values: + + (*) FSCACHE_CHECKAUX_OKAY - the entry is okay as is + (*) FSCACHE_CHECKAUX_NEEDS_UPDATE - the entry requires update + (*) FSCACHE_CHECKAUX_OBSOLETE - the entry should be deleted + + This function can also be used to extract data from the auxilliary data in + the cache and copy it into the netfs's structures. + + (8) A pair of functions to manage contexts for the completion callback + [optional]. + + The cache read/write functions are passed a context which is then passed + to the I/O completion callback function. To ensure this context remains + valid until after the I/O completion is called, two functions may be + provided: one to get an extra reference on the context, and one to drop a + reference to it. + + If the context is not used or is a type of object that won't go out of + scope, then these functions are not required. These functions are not + required for indices as indices may not contain data. These functions may + be called in interrupt context and so may not sleep. + + (9) A function to mark a page as retaining cache metadata [mandatory]. + + This is called by the cache to indicate that it is retaining in-memory + information for this page and that the netfs should uncache the page when + it has finished. This does not indicate whether there's data on the disk + or not. Note that several pages at once may be presented for marking. + + kAFS and NFS use the PG_private bit on the page structure for this, but + that may not be appropriate in all cases. + + This function is not required for indices as they're not permitted data. + +(10) A function to unmark all the pages retaining cache metadata [mandatory]. + + This is called by FS-Cache to indicate that a backing store is being + unbound from a cookie and that all the marks on the pages should be + cleared to prevent confusion. Note that the cache will have torn down all + its tracking information so that the pages don't need to be explicitly + uncached. + + This function is not required for indices as they're not permitted data. + + +=================================== +NETWORK FILESYSTEM (UN)REGISTRATION +=================================== + +The first step is to declare the network filesystem to the cache. This also +involves specifying the layout of the primary index (for AFS, this would be the +"cell" level). + +The registration function is: + + int fscache_register_netfs(struct fscache_netfs *netfs); + +It just takes a pointer to the netfs definition. It returns 0 or an error as +appropriate. + +For kAFS, registration is done as follows: + + ret = fscache_register_netfs(&afs_cache_netfs); + +The last step is, of course, unregistration: + + void fscache_unregister_netfs(struct fscache_netfs *netfs); + + +================ +CACHE TAG LOOKUP +================ + +FS-Cache permits the use of more than one cache. To permit particular index +subtrees to be bound to particular caches, the second step is to look up cache +representation tags. This step is optional; it can be left entirely up to +FS-Cache as to which cache should be used. The problem with doing that is that +FS-Cache will always pick the first cache that was registered. + +To get the representation for a named tag: + + struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name); + +This takes a text string as the name and returns a representation of a tag. It +will never return an error. It may return a dummy tag, however, if it runs out +of memory; this will inhibit caching with this tag. + +Any representation so obtained must be released by passing it to this function: + + void fscache_release_cache_tag(struct fscache_cache_tag *tag); + +The tag will be retrieved by FS-Cache when it calls the object definition +operation select_cache(). + + +================== +INDEX REGISTRATION +================== + +The third step is to inform FS-Cache about part of an index hierarchy that can +be used to locate files. This is done by requesting a cookie for each index in +the path to the file: + + struct fscache_cookie * + fscache_acquire_cookie(struct fscache_cookie *parent, + struct fscache_object_def *def, + void *netfs_data); + +This function creates an index entry in the index represented by parent, +filling in the index entry by calling the operations pointed to by def. + +Note that this function never returns an error - all errors are handled +internally. It may also return NULL to indicate no cookie. It is quite +acceptable to pass this token back to this function as the parent to another +acquisition (or even to the relinquish cookie, read page and write page +functions - see below). + +Note also that no indices are actually created in a cache until a non-index +object needs to be created somewhere down the hierarchy. Furthermore, an index +may be created in several different caches independently at different times. +This is all handled transparently, and the netfs doesn't see any of it. + +For example, with AFS, a cell would be added to the primary index. This index +entry would have a dependent inode containing a volume location index for the +volume mappings within this cell: + + cell->cache = + fscache_acquire_cookie(afs_cache_netfs.primary_index, + &afs_cell_cache_index_def, + cell); + +Then when a volume location was accessed, it would be entered into the cell's +index and an inode would be allocated that acts as a volume type and hash chain +combination: + + vlocation->cache = + fscache_acquire_cookie(cell->cache, + &afs_vlocation_cache_index_def, + vlocation); + +And then a particular flavour of volume (R/O for example) could be added to +that index, creating another index for vnodes (AFS inode equivalents): + + volume->cache = + fscache_acquire_cookie(vlocation->cache, + &afs_volume_cache_index_def, + volume); + + +====================== +DATA FILE REGISTRATION +====================== + +The fourth step is to request a data file be created in the cache. This is +identical to index cookie acquisition. The only difference is that the type in +the object definition should be something other than index type. + + vnode->cache = + fscache_acquire_cookie(volume->cache, + &afs_vnode_cache_object_def, + vnode); + + +================================= +MISCELLANEOUS OBJECT REGISTRATION +================================= + +An optional step is to request an object of miscellaneous type be created in +the cache. This is almost identical to index cookie acquisition. The only +difference is that the type in the object definition should be something other +than index type. Whilst the parent object could be an index, it's more likely +it would be some other type of object such as a data file. + + xattr->cache = + fscache_acquire_cookie(vnode->cache, + &afs_xattr_cache_object_def, + xattr); + +Miscellaneous objects might be used to store extended attributes or directory +entries for example. + + +========================== +SETTING THE DATA FILE SIZE +========================== + +The fifth step is to set the size of the file. This doesn't automatically +reserve any space in the cache, but permits the cache to adjust its metadata +for data tracking appropriately: + + int fscache_set_i_size(struct fscache_cookie *cookie, loff_t i_size); + +The cache will return -ENOBUFS if there is no backing cache or if there is no +space to allocate any extra metadata required in the cache. + +Note that attempts to read or write data pages in the cache over this size may +be rebuffed with -ENOBUFS. + + +===================== +PAGE READ/ALLOC/WRITE +===================== + +And the sixth step is to store and retrieve pages in the cache. There are +three functions that are used to do this. + +Note: + + (1) A page should not be re-read or re-allocated without uncaching it first. + + (2) A read or allocated page must be uncached when the netfs page is released + from the pagecache. + + (3) A page should only be written to the cache if previous read or allocated. + +This permits the cache to maintain its page tracking in proper order. + + +PAGE READ +--------- + +Firstly, the netfs should ask FS-Cache to examine the caches and read the +contents cached for a particular page of a particular file if present, or else +allocate space to store the contents if not: + + typedef + void (*fscache_rw_complete_t)(struct page *page, + void *context, + int error); + + int fscache_read_or_alloc_page(struct fscache_cookie *cookie, + struct page *page, + fscache_rw_complete_t end_io_func, + void *end_io_data, + gfp_t gfp); + +The cookie argument must specify a cookie for an object that isn't an index, +the page specified will have the data loaded into it (and is also used to +specify the page number), and the gfp argument is used to control how any +memory allocations made are satisfied. + +If the cookie indicates the inode is not cached: + + (1) The function will return -ENOBUFS. + +Else if there's a copy of the page resident in the cache: + + (1) The mark_pages_cached() cookie operation will be called on that page. + + (2) The function will submit a request to read the data from the cache's + backing device directly into the page specified. + + (3) The function will return 0. + + (4) When the read is complete, end_io_func() will be invoked with: + + (*) The netfs data supplied when the cookie was created. + + (*) The page descriptor. + + (*) The context argument passed to the above function. This will be + maintained with the get_context/put_context functions mentioned above. + + (*) An argument that's 0 on success or negative for an error code. + + If an error occurs, it should be assumed that the page contains no usable + data. + + end_io_func() will be called in process context if the read is results in + an error, but it might be called in interrupt context if the read is + successful. + +Otherwise, if there's not a copy available in cache, but the cache may be able +to store the page: + + (1) The mark_pages_cached() cookie operation will be called on that page. + + (2) A block may be reserved in the cache and attached to the object at the + appropriate place. + + (3) The function will return -ENODATA. + +This function may also return -ENOMEM or -EINTR, in which case it won't have +read any data from the cache. + + +PAGE ALLOCATE +------------- + +Alternatively, if there's not expected to be any data in the cache for a page +because the file has been extended, a block can simply be allocated instead: + + int fscache_alloc_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp); + +This is similar to the fscache_read_or_alloc_page() function, except that it +never reads from the cache. It will return 0 if a block has been allocated, +rather than -ENODATA as the other would. One or the other must be performed +before writing to the cache. + +The mark_pages_cached() cookie operation will be called on the page if +successful. + + +PAGE WRITE +---------- + +Secondly, if the netfs changes the contents of the page (either due to an +initial download or if a user performs a write), then the page should be +written back to the cache: + + int fscache_write_page(struct fscache_cookie *cookie, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp); + +The cookie argument must specify a data file cookie, the page specified should +contain the data to be written (and is also used to specify the page number), +and the gfp argument is used to control how any memory allocations made are +satisfied. + +The page must have first been read or allocated successfully and must not have +been uncached before writing is performed. + +If the cookie indicates the inode is not cached then: + + (1) The function will return -ENOBUFS. + +Else if space can be allocated in the cache to hold this page: + + (1) The function will submit a request to write the data to cache's backing + device directly from the page specified. + + (2) The function will return 0. + + (3) When the write is complete the end_io_func() will be invoked with: + + (*) The netfs data supplied when the cookie was created. + + (*) The page descriptor. + + (*) The context argument passed to the function. This will be maintained + with the get_context/put_context functions mentioned above. + + (*) An argument that's 0 on success or negative for an error. + + If an error occurs, it can be assumed that the page has not been written + to the cache, and that either there's a block containing the old data or + no block at all in the cache. + + end_io_func() might be called in interrupt context. + +Else if there's no space available in the cache, -ENOBUFS will be returned. + + +MULTIPLE PAGE READ +------------------ + +A facility is provided to read several pages at once, as requested by the +readpages() address space operation: + + int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, + struct address_space *mapping, + struct list_head *pages, + int *nr_pages, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp); + +This works in a similar way to fscache_read_or_alloc_page(), except: + + (1) Any page it can retrieve data for is removed from pages and nr_pages and + dispatched for reading to the disk. Reads of adjacent pages on disk may + be merged for greater efficiency. + + (2) The mark_pages_cached() cookie operation will be called on several pages + at once if they're being read or allocated. + + (3) If there was an general error, then that error will be returned. + + Else if some pages couldn't be allocated or read, then -ENOBUFS will be + returned. + + Else if some pages couldn't be read but were allocated, then -ENODATA will + be returned. + + Otherwise, if all pages had reads dispatched, then 0 will be returned, the + list will be empty and *nr_pages will be 0. + + (4) end_io_func will be called once for each page being read as the reads + complete. It will be called in process context if error != 0, but it may + be called in interrupt context if there is no error. + +Note that a return of -ENODATA, -ENOBUFS or any other error does not preclude +some of the pages being read and some being allocated. Those pages will have +been marked appropriately and will need uncaching. + + +============== +PAGE UNCACHING +============== + +To uncache a page, this function should be called: + + void fscache_uncache_page(struct fscache_cookie *cookie, + struct page *page); + +This function permits the cache to release any in-memory representation it +might be holding for this netfs page. This function must be called once for +each page on which the read or write page functions above have been called to +make sure the cache's in-memory tracking information gets torn down. + +Note that pages can't be explicitly deleted from the a data file. The whole +data file must be retired (see the relinquish cookie function below). + +Furthermore, note that this does not cancel the asynchronous read or write +operation started by the read/alloc and write functions. + +There is another unbinding operation similar to the above that takes a set of +pages to unbind in one go: + + void fscache_uncache_pagevec(struct fscache_cookie *cookie, + struct pagevec *pagevec); + + +========================== +INDEX AND DATA FILE UPDATE +========================== + +To request an update of the index data for an index or other object, the +following function should be called: + + void fscache_update_cookie(struct fscache_cookie *cookie); + +This function will refer back to the netfs_data pointer stored in the cookie by +the acquisition function to obtain the data to write into each revised index +entry. The update method in the parent index definition will be called to +transfer the data. + +Note that partial updates may happen automatically at other times, such as when +data blocks are added to a data file object. + + +=============================== +MISCELLANEOUS COOKIE OPERATIONS +=============================== + +There are a number of operations that can be used to control cookies: + + (*) Cookie pinning: + + int fscache_pin_cookie(struct fscache_cookie *cookie); + void fscache_unpin_cookie(struct fscache_cookie *cookie); + + These operations permit data cookies to be pinned into the cache and to + have the pinning removed. They are not permitted on index cookies. + + The pinning function will return 0 if successful, -ENOBUFS in the cookie + isn't backed by a cache, -EOPNOTSUPP if the cache doesn't support pinning, + -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or + -EIO if there's any other problem. + + (*) Data space reservation: + + int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size); + + This permits a netfs to request cache space be reserved to store up to the + given amount of a file. It is permitted to ask for more than the current + size of the file to allow for future file expansion. + + If size is given as zero then the reservation will be cancelled. + + The function will return 0 if successful, -ENOBUFS in the cookie isn't + backed by a cache, -EOPNOTSUPP if the cache doesn't support reservations, + -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or + -EIO if there's any other problem. + + Note that this doesn't pin an object in a cache; it can still be culled to + make space if it's not in use. + + +===================== +COOKIE UNREGISTRATION +===================== + +To get rid of a cookie, this function should be called. + + void fscache_relinquish_cookie(struct fscache_cookie *cookie, + int retire); + +If retire is non-zero, then the object will be marked for recycling, and all +copies of it will be removed from all active caches in which it is present. +Not only that but all child objects will also be retired. + +If retire is zero, then the object may be available again when next the +acquisition function is called. Retirement here will overrule the pinning on a +cookie. + +One very important note - relinquish must NOT be called for a cookie unless all +the cookies for "child" indices, objects and pages have been relinquished +first. + + +================================ +INDEX AND DATA FILE INVALIDATION +================================ + +There is no direct way to invalidate an index subtree or a data file. To do +this, the caller should relinquish and retire the cookie they have, and then +acquire a new one. diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 0b62c62..ead15f0 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -71,7 +71,7 @@ you might want to raise the limit. ============================================================== -file-max & file-nr: +file-max, file-nr & file-kernel: The kernel allocates file handles dynamically, but as yet it doesn't free them again. @@ -88,6 +88,10 @@ close to the maximum, but the number of significantly greater than 0, you've encountered a peak in your usage of file handles and you don't need to increase the maximum. +The value in file-kernel denotes the number of internal file handles +that the kernel has open. These do not contribute to ENFILE +accounting. + ============================================================== inode-max, inode-nr & inode-state: diff --git a/fs/Kconfig b/fs/Kconfig index 53f5c6d..12e77a1 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -530,6 +530,41 @@ config FUSE_FS If you want to develop a userspace FS, or if you want to use a filesystem based on FUSE, answer Y or M. +menu "Caches" + +config FSCACHE + tristate "General filesystem cache manager" + depends on EXPERIMENTAL + help + This option enables a generic filesystem caching manager that can be + used by various network and other filesystems to cache data + locally. Different sorts of caches can be plugged in, depending on the + resources available. + + See Documentation/filesystems/caching/fscache.txt for more information. + +config CACHEFILES + tristate "Filesystem caching on files" + depends on FSCACHE + help + This permits use of a mounted filesystem as a cache for other + filesystems - primarily networking filesystems - thus allowing fast + local disk to enhance the speed of slower devices. + + See Documentation/filesystems/caching/cachefiles.txt for more + information. + +config CACHEFILES_DEBUG + bool "Debug CacheFiles" + depends on CACHEFILES + help + This permits debugging to be dynamically enabled in the filesystem + caching on files module. If this is set, the debugging output may be + enabled by setting bits in /proc/sys/fs/cachefiles/debug or by + including a debugging specifier in /etc/cachefilesd.conf. + +endmenu + menu "CD-ROM/DVD Filesystems" config ISO9660_FS @@ -1470,6 +1505,13 @@ config NFS_V4 If unsure, say N. +config NFS_FSCACHE + bool "Provide NFS client caching support (EXPERIMENTAL)" + depends on NFS_FS && FSCACHE && EXPERIMENTAL + help + Say Y here if you want NFS data to be cached locally on disc through + the general filesystem cache manager + config NFS_DIRECTIO bool "Allow direct I/O on NFS files (EXPERIMENTAL)" depends on NFS_FS && EXPERIMENTAL @@ -1906,6 +1948,13 @@ # for fs/nls/Config.in If unsure, say N. +config AFS_FSCACHE + bool "Provide AFS client caching support" + depends on AFS_FS && FSCACHE && EXPERIMENTAL + help + Say Y here if you want AFS data to be cached locally on through the + generic filesystem cache manager + config RXRPC tristate diff --git a/fs/Makefile b/fs/Makefile index 8913542..e5efb4b 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -52,6 +52,7 @@ obj-y += devpts/ obj-$(CONFIG_PROFILING) += dcookies.o # Do not add any filesystems before this line +obj-$(CONFIG_FSCACHE) += fscache/ obj-$(CONFIG_REISERFS_FS) += reiserfs/ obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 obj-$(CONFIG_JBD) += jbd/ @@ -100,5 +101,6 @@ obj-$(CONFIG_AFS_FS) += afs/ obj-$(CONFIG_BEFS_FS) += befs/ obj-$(CONFIG_HOSTFS) += hostfs/ obj-$(CONFIG_HPPFS) += hppfs/ +obj-$(CONFIG_CACHEFILES) += cachefiles/ obj-$(CONFIG_DEBUG_FS) += debugfs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ diff --git a/fs/afs/cache.h b/fs/afs/cache.h deleted file mode 100644 index 9eb7722..0000000 --- a/fs/afs/cache.h +++ /dev/null @@ -1,27 +0,0 @@ -/* cache.h: AFS local cache management interface - * - * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _LINUX_AFS_CACHE_H -#define _LINUX_AFS_CACHE_H - -#undef AFS_CACHING_SUPPORT - -#include -#ifdef AFS_CACHING_SUPPORT -#include -#endif -#include "types.h" - -#ifdef __KERNEL__ - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_AFS_CACHE_H */ diff --git a/fs/afs/cell.c b/fs/afs/cell.c index bfc1fd2..3aaeada 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -31,17 +31,21 @@ static DEFINE_RWLOCK(afs_cells_lock); static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */ static struct afs_cell *afs_cell_root; -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_cell_cache_match(void *target, - const void *entry); -static void afs_cell_cache_update(void *source, void *entry); - -struct cachefs_index_def afs_cache_cell_index_def = { - .name = "cell_ix", - .data_size = sizeof(struct afs_cache_cell), - .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 }, - .match = afs_cell_cache_match, - .update = afs_cell_cache_update, +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static fscache_checkaux_t afs_cell_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen); + +static struct fscache_cookie_def afs_cell_cache_index_def = { + .name = "AFS cell", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = afs_cell_cache_get_key, + .get_aux = afs_cell_cache_get_aux, + .check_aux = afs_cell_cache_check_aux, }; #endif @@ -115,12 +119,11 @@ int afs_cell_create(const char *name, ch if (ret < 0) goto error; -#ifdef AFS_CACHING_SUPPORT - /* put it up for caching */ - cachefs_acquire_cookie(afs_cache_netfs.primary_index, - &afs_vlocation_cache_index_def, - cell, - &cell->cache); +#ifdef CONFIG_AFS_FSCACHE + /* put it up for caching (this never returns an error) */ + cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, + &afs_cell_cache_index_def, + cell); #endif /* add to the cell lists */ @@ -345,8 +348,8 @@ static void afs_cell_destroy(struct afs_ list_del_init(&cell->proc_link); up_write(&afs_proc_cells_sem); -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(cell->cache, 0); +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(cell->cache, 0); #endif up_write(&afs_cells_sem); @@ -525,44 +528,62 @@ void afs_cell_purge(void) /*****************************************************************************/ /* - * match a cell record obtained from the cache + * set the key for the index entry */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_cell_cache_match(void *target, - const void *entry) +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - const struct afs_cache_cell *ccell = entry; - struct afs_cell *cell = target; + const struct afs_cell *cell = cookie_netfs_data; + uint16_t klen; - _enter("{%s},{%s}", ccell->name, cell->name); + _enter("%p,%p,%u", cell, buffer, bufmax); - if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) { - _leave(" = SUCCESS"); - return CACHEFS_MATCH_SUCCESS; - } + klen = strlen(cell->name); + if (klen > bufmax) + return 0; + + memcpy(buffer, cell->name, klen); + return klen; - _leave(" = FAILED"); - return CACHEFS_MATCH_FAILED; -} /* end afs_cell_cache_match() */ +} /* end afs_cell_cache_get_key() */ #endif /*****************************************************************************/ /* - * update a cell record in the cache + * provide new auxilliary cache data */ -#ifdef AFS_CACHING_SUPPORT -static void afs_cell_cache_update(void *source, void *entry) +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - struct afs_cache_cell *ccell = entry; - struct afs_cell *cell = source; + const struct afs_cell *cell = cookie_netfs_data; + uint16_t dlen; - _enter("%p,%p", source, entry); + _enter("%p,%p,%u", cell, buffer, bufmax); - strncpy(ccell->name, cell->name, sizeof(ccell->name)); + dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]); + dlen = min(dlen, bufmax); + dlen &= ~(sizeof(cell->vl_addrs[0]) - 1); - memcpy(ccell->vl_servers, - cell->vl_addrs, - min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs))); + memcpy(buffer, cell->vl_addrs, dlen); + + return dlen; + +} /* end afs_cell_cache_get_aux() */ +#endif + +/*****************************************************************************/ +/* + * check that the auxilliary data indicates that the entry is still valid + */ +#ifdef CONFIG_AFS_FSCACHE +static fscache_checkaux_t afs_cell_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen) +{ + _leave(" = OKAY"); + return FSCACHE_CHECKAUX_OKAY; -} /* end afs_cell_cache_update() */ +} /* end afs_cell_cache_check_aux() */ #endif diff --git a/fs/afs/cell.h b/fs/afs/cell.h index 4834910..d670502 100644 --- a/fs/afs/cell.h +++ b/fs/afs/cell.h @@ -13,7 +13,7 @@ #ifndef _LINUX_AFS_CELL_H #define _LINUX_AFS_CELL_H #include "types.h" -#include "cache.h" +#include #define AFS_CELL_MAX_ADDRS 15 @@ -21,16 +21,6 @@ extern volatile int afs_cells_being_purg /*****************************************************************************/ /* - * entry in the cached cell catalogue - */ -struct afs_cache_cell -{ - char name[64]; /* cell name (padded with NULs) */ - struct in_addr vl_servers[15]; /* cached cell VL servers */ -}; - -/*****************************************************************************/ -/* * AFS cell record */ struct afs_cell @@ -39,8 +29,8 @@ struct afs_cell struct list_head link; /* main cell list link */ struct list_head proc_link; /* /proc cell list link */ struct proc_dir_entry *proc_dir; /* /proc dir for this cell */ -#ifdef AFS_CACHING_SUPPORT - struct cachefs_cookie *cache; /* caching cookie */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ #endif /* server record management */ diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 3d097fd..f87d5a7 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -24,7 +24,7 @@ #include "cmservice.h" #include "internal.h" static unsigned afscm_usage; /* AFS cache manager usage count */ -static struct rw_semaphore afscm_sem; /* AFS cache manager start/stop semaphore */ +static DECLARE_RWSEM(afscm_sem); /* AFS cache manager start/stop semaphore */ static int afscm_new_call(struct rxrpc_call *call); static void afscm_attention(struct rxrpc_call *call); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 2fc9987..9800a07 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -145,7 +145,7 @@ #endif qty /= sizeof(union afs_dir_block); /* check them */ - dbuf = page_address(page); + dbuf = kmap_atomic(page, KM_USER0); for (tmp = 0; tmp < qty; tmp++) { if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) { printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n", @@ -154,12 +154,12 @@ #endif goto error; } } + kunmap_atomic(dbuf, KM_USER0); - SetPageChecked(page); return; error: - SetPageChecked(page); + kunmap_atomic(dbuf, KM_USER0); SetPageError(page); } /* end afs_dir_check_page() */ @@ -170,7 +170,6 @@ #endif */ static inline void afs_dir_put_page(struct page *page) { - kunmap(page); page_cache_release(page); } /* end afs_dir_put_page() */ @@ -188,11 +187,9 @@ static struct page *afs_dir_get_page(str page = read_mapping_page(dir->i_mapping, index, NULL); if (!IS_ERR(page)) { wait_on_page_locked(page); - kmap(page); if (!PageUptodate(page)) goto fail; - if (!PageChecked(page)) - afs_dir_check_page(dir, page); + afs_dir_check_page(dir, page); if (PageError(page)) goto fail; } @@ -357,7 +354,7 @@ static int afs_dir_iterate(struct inode limit = blkoff & ~(PAGE_SIZE - 1); - dbuf = page_address(page); + dbuf = kmap_atomic(page, KM_USER0); /* deal with the individual blocks stashed on this page */ do { @@ -366,6 +363,7 @@ static int afs_dir_iterate(struct inode ret = afs_dir_iterate_block(fpos, dblock, blkoff, cookie, filldir); if (ret != 1) { + kunmap_atomic(dbuf, KM_USER0); afs_dir_put_page(page); goto out; } @@ -374,6 +372,7 @@ static int afs_dir_iterate(struct inode } while (*fpos < dir->i_size && blkoff < limit); + kunmap_atomic(dbuf, KM_USER0); afs_dir_put_page(page); ret = 0; } diff --git a/fs/afs/file.c b/fs/afs/file.c index 67d6634..e8e3680 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -16,12 +16,15 @@ #include #include #include #include +#include #include #include "volume.h" #include "vnode.h" #include #include "internal.h" +#define list_to_page(head) (list_entry((head)->prev, struct page, lru)) + #if 0 static int afs_file_open(struct inode *inode, struct file *file); static int afs_file_release(struct inode *inode, struct file *file); @@ -30,34 +33,74 @@ #endif static int afs_file_readpage(struct file *file, struct page *page); static void afs_file_invalidatepage(struct page *page, unsigned long offset); static int afs_file_releasepage(struct page *page, gfp_t gfp_flags); +static int afs_file_mmap(struct file * file, struct vm_area_struct * vma); + +#ifdef CONFIG_AFS_FSCACHE +static int afs_file_readpages(struct file *filp, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages); +static int afs_file_page_mkwrite(struct vm_area_struct *vma, struct page *page); +#endif struct inode_operations afs_file_inode_operations = { .getattr = afs_inode_getattr, }; +const struct file_operations afs_file_file_operations = { + .llseek = generic_file_llseek, + .read = generic_file_read, + .mmap = afs_file_mmap, + .sendfile = generic_file_sendfile, +}; + const struct address_space_operations afs_fs_aops = { .readpage = afs_file_readpage, +#ifdef CONFIG_AFS_FSCACHE + .readpages = afs_file_readpages, +#endif .sync_page = block_sync_page, .set_page_dirty = __set_page_dirty_nobuffers, .releasepage = afs_file_releasepage, .invalidatepage = afs_file_invalidatepage, }; +static struct vm_operations_struct afs_fs_vm_operations = { + .nopage = filemap_nopage, + .populate = filemap_populate, +#ifdef CONFIG_AFS_FSCACHE + .page_mkwrite = afs_file_page_mkwrite, +#endif +}; + +/*****************************************************************************/ +/* + * set up a memory mapping on an AFS file + * - we set our own VMA ops so that we can catch the page becoming writable for + * userspace for shared-writable mmap + */ +static int afs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + _enter(""); + + file_accessed(file); + vma->vm_ops = &afs_fs_vm_operations; + return 0; + +} /* end afs_file_mmap() */ + /*****************************************************************************/ /* * deal with notification that a page was read from the cache */ -#ifdef AFS_CACHING_SUPPORT -static void afs_file_readpage_read_complete(void *cookie_data, - struct page *page, +#ifdef CONFIG_AFS_FSCACHE +static void afs_file_readpage_read_complete(struct page *page, void *data, int error) { - _enter("%p,%p,%p,%d", cookie_data, page, data, error); + _enter("%p,%p,%d", page, data, error); - if (error) - SetPageError(page); - else + /* if the read completes with an error, we just unlock the page and let + * the VM reissue the readpage */ + if (!error) SetPageUptodate(page); unlock_page(page); @@ -68,15 +111,16 @@ #endif /* * deal with notification that a page was written to the cache */ -#ifdef AFS_CACHING_SUPPORT -static void afs_file_readpage_write_complete(void *cookie_data, - struct page *page, +#ifdef CONFIG_AFS_FSCACHE +static void afs_file_readpage_write_complete(struct page *page, void *data, int error) { - _enter("%p,%p,%p,%d", cookie_data, page, data, error); + _enter("%p,%p,%d", page, data, error); - unlock_page(page); + /* note that the page has been written to the cache and can now be + * modified */ + end_page_fs_misc(page); } /* end afs_file_readpage_write_complete() */ #endif @@ -88,16 +132,13 @@ #endif static int afs_file_readpage(struct file *file, struct page *page) { struct afs_rxfs_fetch_descriptor desc; -#ifdef AFS_CACHING_SUPPORT - struct cachefs_page *pageio; -#endif struct afs_vnode *vnode; struct inode *inode; int ret; inode = page->mapping->host; - _enter("{%lu},{%lu}", inode->i_ino, page->index); + _enter("{%lu},%p{%lu}", inode->i_ino, page, page->index); vnode = AFS_FS_I(inode); @@ -107,13 +148,9 @@ #endif if (vnode->flags & AFS_VNODE_DELETED) goto error; -#ifdef AFS_CACHING_SUPPORT - ret = cachefs_page_get_private(page, &pageio, GFP_NOIO); - if (ret < 0) - goto error; - +#ifdef CONFIG_AFS_FSCACHE /* is it cached? */ - ret = cachefs_read_or_alloc_page(vnode->cache, + ret = fscache_read_or_alloc_page(vnode->cache, page, afs_file_readpage_read_complete, NULL, @@ -123,18 +160,20 @@ #else #endif switch (ret) { - /* read BIO submitted and wb-journal entry found */ - case 1: - BUG(); // TODO - handle wb-journal match - /* read BIO submitted (page in cache) */ case 0: break; - /* no page available in cache */ - case -ENOBUFS: + /* page not yet cached */ case -ENODATA: + _debug("cache said ENODATA"); + goto go_on; + + /* page will not be cached */ + case -ENOBUFS: + _debug("cache said ENOBUFS"); default: + go_on: desc.fid = vnode->fid; desc.offset = page->index << PAGE_CACHE_SHIFT; desc.size = min((size_t) (inode->i_size - desc.offset), @@ -148,34 +187,40 @@ #endif ret = afs_vnode_fetch_data(vnode, &desc); kunmap(page); if (ret < 0) { - if (ret==-ENOENT) { - _debug("got NOENT from server" + if (ret == -ENOENT) { + kdebug("got NOENT from server" " - marking file deleted and stale"); vnode->flags |= AFS_VNODE_DELETED; ret = -ESTALE; } -#ifdef AFS_CACHING_SUPPORT - cachefs_uncache_page(vnode->cache, page); +#ifdef CONFIG_AFS_FSCACHE + fscache_uncache_page(vnode->cache, page); + ClearPagePrivate(page); #endif goto error; } SetPageUptodate(page); -#ifdef AFS_CACHING_SUPPORT - if (cachefs_write_page(vnode->cache, - page, - afs_file_readpage_write_complete, - NULL, - GFP_KERNEL) != 0 - ) { - cachefs_uncache_page(vnode->cache, page); - unlock_page(page); + /* send the page to the cache */ +#ifdef CONFIG_AFS_FSCACHE + if (PagePrivate(page)) { + if (TestSetPageFsMisc(page)) + BUG(); + if (fscache_write_page(vnode->cache, + page, + afs_file_readpage_write_complete, + NULL, + GFP_KERNEL) != 0 + ) { + fscache_uncache_page(vnode->cache, page); + ClearPagePrivate(page); + end_page_fs_misc(page); + } } -#else - unlock_page(page); #endif + unlock_page(page); } _leave(" = 0"); @@ -192,20 +237,63 @@ #endif /*****************************************************************************/ /* - * get a page cookie for the specified page + * read a set of pages */ -#ifdef AFS_CACHING_SUPPORT -int afs_cache_get_page_cookie(struct page *page, - struct cachefs_page **_page_cookie) +#ifdef CONFIG_AFS_FSCACHE +static int afs_file_readpages(struct file *filp, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) { - int ret; + struct afs_vnode *vnode; +#if 0 + struct pagevec lru_pvec; + unsigned page_idx; +#endif + int ret = 0; - _enter(""); - ret = cachefs_page_get_private(page,_page_cookie, GFP_NOIO); + _enter(",{%lu},,%d", mapping->host->i_ino, nr_pages); - _leave(" = %d", ret); + vnode = AFS_FS_I(mapping->host); + if (vnode->flags & AFS_VNODE_DELETED) { + _leave(" = -ESTALE"); + return -ESTALE; + } + + /* attempt to read as many of the pages as possible */ + ret = fscache_read_or_alloc_pages(vnode->cache, + mapping, + pages, + &nr_pages, + afs_file_readpage_read_complete, + NULL, + mapping_gfp_mask(mapping)); + + switch (ret) { + /* all pages are being read from the cache */ + case 0: + BUG_ON(!list_empty(pages)); + BUG_ON(nr_pages != 0); + _leave(" = 0 [reading all]"); + return 0; + + /* there were pages that couldn't be read from the cache */ + case -ENODATA: + case -ENOBUFS: + break; + + /* other error */ + default: + _leave(" = %d", ret); + return ret; + } + + /* load the missing pages from the network */ + ret = read_cache_pages(mapping, pages, + (void *) afs_file_readpage, NULL); + + _leave(" = %d [netting]", ret); return ret; -} /* end afs_cache_get_page_cookie() */ + +} /* end afs_file_readpages() */ #endif /*****************************************************************************/ @@ -214,35 +302,22 @@ #endif */ static void afs_file_invalidatepage(struct page *page, unsigned long offset) { - int ret = 1; - _enter("{%lu},%lu", page->index, offset); BUG_ON(!PageLocked(page)); if (PagePrivate(page)) { -#ifdef AFS_CACHING_SUPPORT - struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - cachefs_uncache_page(vnode->cache,page); -#endif - /* We release buffers only if the entire page is being * invalidated. * The get_block cached value has been unconditionally * invalidated, so real IO is not possible anymore. */ - if (offset == 0) { - BUG_ON(!PageLocked(page)); - - ret = 0; - if (!PageWriteback(page)) - ret = page->mapping->a_ops->releasepage(page, - 0); - /* possibly should BUG_ON(!ret); - neilb */ - } + if (offset == 0 && !PageWriteback(page)) + page->mapping->a_ops->releasepage(page, 0); } - _leave(" = %d", ret); + _leave(""); + } /* end afs_file_invalidatepage() */ /*****************************************************************************/ @@ -251,23 +326,30 @@ #endif */ static int afs_file_releasepage(struct page *page, gfp_t gfp_flags) { - struct cachefs_page *pageio; - _enter("{%lu},%x", page->index, gfp_flags); - if (PagePrivate(page)) { -#ifdef AFS_CACHING_SUPPORT - struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - cachefs_uncache_page(vnode->cache, page); +#ifdef CONFIG_AFS_FSCACHE + wait_on_page_fs_misc(page); + fscache_uncache_page(AFS_FS_I(page->mapping->host)->cache, page); + ClearPagePrivate(page); #endif - pageio = (struct cachefs_page *) page_private(page); - set_page_private(page, 0); - ClearPagePrivate(page); + /* indicate that the page can be released */ + _leave(" = 1"); + return 1; - kfree(pageio); - } +} /* end afs_file_releasepage() */ - _leave(" = 0"); +/*****************************************************************************/ +/* + * wait for the disc cache to finish writing before permitting modification of + * our page in the page cache + */ +#ifdef CONFIG_AFS_FSCACHE +static int afs_file_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + wait_on_page_fs_misc(page); return 0; -} /* end afs_file_releasepage() */ + +} /* end afs_file_page_mkwrite() */ +#endif diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 61bc371..c88c41a 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -398,6 +398,8 @@ int afs_rxfs_fetch_file_status(struct af bp++; /* spare6 */ } + _debug("Data Version %llx\n", vnode->status.version); + /* success */ ret = 0; @@ -408,7 +410,7 @@ int afs_rxfs_fetch_file_status(struct af out_put_conn: afs_server_release_callslot(server, &callslot); out: - _leave(""); + _leave(" = %d", ret); return ret; abort: diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 4ebb30a..0a59eda 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -49,7 +49,7 @@ static int afs_inode_map_status(struct a case AFS_FTYPE_FILE: inode->i_mode = S_IFREG | vnode->status.mode; inode->i_op = &afs_file_inode_operations; - inode->i_fop = &generic_ro_fops; + inode->i_fop = &afs_file_file_operations; break; case AFS_FTYPE_DIR: inode->i_mode = S_IFDIR | vnode->status.mode; @@ -65,6 +65,11 @@ static int afs_inode_map_status(struct a return -EBADMSG; } +#ifdef CONFIG_AFS_FSCACHE + if (vnode->status.size != inode->i_size) + fscache_set_i_size(vnode->cache, vnode->status.size); +#endif + inode->i_nlink = vnode->status.nlink; inode->i_uid = vnode->status.owner; inode->i_gid = 0; @@ -101,13 +106,33 @@ static int afs_inode_fetch_status(struct struct afs_vnode *vnode; int ret; + _enter(""); + vnode = AFS_FS_I(inode); ret = afs_vnode_fetch_status(vnode); - if (ret == 0) + if (ret == 0) { +#ifdef CONFIG_AFS_FSCACHE + if (!vnode->cache) { + vnode->cache = + fscache_acquire_cookie(vnode->volume->cache, + &afs_vnode_cache_index_def, + vnode); + if (!vnode->cache) + printk("Negative\n"); + } +#endif ret = afs_inode_map_status(vnode); +#ifdef CONFIG_AFS_FSCACHE + if (ret < 0) { + fscache_relinquish_cookie(vnode->cache, 0); + vnode->cache = NULL; + } +#endif + } + _leave(" = %d", ret); return ret; } /* end afs_inode_fetch_status() */ @@ -122,6 +147,7 @@ static int afs_iget5_test(struct inode * return inode->i_ino == data->fid.vnode && inode->i_version == data->fid.unique; + } /* end afs_iget5_test() */ /*****************************************************************************/ @@ -179,20 +205,11 @@ inline int afs_iget(struct super_block * return ret; } -#ifdef AFS_CACHING_SUPPORT - /* set up caching before reading the status, as fetch-status reads the - * first page of symlinks to see if they're really mntpts */ - cachefs_acquire_cookie(vnode->volume->cache, - NULL, - vnode, - &vnode->cache); -#endif - /* okay... it's a new inode */ inode->i_flags |= S_NOATIME; vnode->flags |= AFS_VNODE_CHANGED; ret = afs_inode_fetch_status(inode); - if (ret<0) + if (ret < 0) goto bad_inode; /* success */ @@ -278,8 +295,8 @@ void afs_clear_inode(struct inode *inode afs_vnode_give_up_callback(vnode); -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(vnode->cache, 0); +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(vnode->cache, 0); vnode->cache = NULL; #endif diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e88b3b6..482dbd1 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -16,15 +16,17 @@ #include #include #include #include +#include /* * debug tracing */ -#define kenter(FMT, a...) printk("==> %s("FMT")\n",__FUNCTION__ , ## a) -#define kleave(FMT, a...) printk("<== %s()"FMT"\n",__FUNCTION__ , ## a) -#define kdebug(FMT, a...) printk(FMT"\n" , ## a) -#define kproto(FMT, a...) printk("### "FMT"\n" , ## a) -#define knet(FMT, a...) printk(FMT"\n" , ## a) +#define __kdbg(FMT, a...) printk("[%05d] "FMT"\n", current->pid , ## a) +#define kenter(FMT, a...) __kdbg("==> %s("FMT")", __FUNCTION__ , ## a) +#define kleave(FMT, a...) __kdbg("<== %s()"FMT, __FUNCTION__ , ## a) +#define kdebug(FMT, a...) __kdbg(FMT , ## a) +#define kproto(FMT, a...) __kdbg("### "FMT , ## a) +#define knet(FMT, a...) __kdbg(FMT , ## a) #ifdef __KDEBUG #define _enter(FMT, a...) kenter(FMT , ## a) @@ -56,9 +58,6 @@ static inline void afs_discard_my_signal */ extern struct rw_semaphore afs_proc_cells_sem; extern struct list_head afs_proc_cells; -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_index_def afs_cache_cell_index_def; -#endif /* * dir.c @@ -71,11 +70,7 @@ extern const struct file_operations afs_ */ extern const struct address_space_operations afs_fs_aops; extern struct inode_operations afs_file_inode_operations; - -#ifdef AFS_CACHING_SUPPORT -extern int afs_cache_get_page_cookie(struct page *page, - struct cachefs_page **_page_cookie); -#endif +extern const struct file_operations afs_file_file_operations; /* * inode.c @@ -97,8 +92,8 @@ #endif /* * main.c */ -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_netfs afs_cache_netfs; +#ifdef CONFIG_AFS_FSCACHE +extern struct fscache_netfs afs_cache_netfs; #endif /* diff --git a/fs/afs/main.c b/fs/afs/main.c index 913c689..5840bb2 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -1,6 +1,6 @@ /* main.c: AFS client file system * - * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2002,5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -14,11 +14,11 @@ #include #include #include #include +#include #include #include #include #include -#include "cache.h" #include "cell.h" #include "server.h" #include "fsclient.h" @@ -51,12 +51,11 @@ static struct rxrpc_peer_ops afs_peer_op struct list_head afs_cb_hash_tbl[AFS_CB_HASH_COUNT]; DEFINE_SPINLOCK(afs_cb_hash_lock); -#ifdef AFS_CACHING_SUPPORT -static struct cachefs_netfs_operations afs_cache_ops = { - .get_page_cookie = afs_cache_get_page_cookie, +#ifdef CONFIG_AFS_FSCACHE +static struct fscache_netfs_operations afs_cache_ops = { }; -struct cachefs_netfs afs_cache_netfs = { +struct fscache_netfs afs_cache_netfs = { .name = "afs", .version = 0, .ops = &afs_cache_ops, @@ -83,10 +82,9 @@ static int __init afs_init(void) if (ret < 0) return ret; -#ifdef AFS_CACHING_SUPPORT +#ifdef CONFIG_AFS_FSCACHE /* we want to be able to cache */ - ret = cachefs_register_netfs(&afs_cache_netfs, - &afs_cache_cell_index_def); + ret = fscache_register_netfs(&afs_cache_netfs); if (ret < 0) goto error; #endif @@ -137,8 +135,8 @@ #ifdef CONFIG_KEYS_TURNED_OFF afs_key_unregister(); error_cache: #endif -#ifdef AFS_CACHING_SUPPORT - cachefs_unregister_netfs(&afs_cache_netfs); +#ifdef CONFIG_AFS_FSCACHE + fscache_unregister_netfs(&afs_cache_netfs); error: #endif afs_cell_purge(); @@ -167,8 +165,8 @@ static void __exit afs_exit(void) #ifdef CONFIG_KEYS_TURNED_OFF afs_key_unregister(); #endif -#ifdef AFS_CACHING_SUPPORT - cachefs_unregister_netfs(&afs_cache_netfs); +#ifdef CONFIG_AFS_FSCACHE + fscache_unregister_netfs(&afs_cache_netfs); #endif afs_proc_cleanup(); diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 99785a7..2a53d51 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -78,7 +78,7 @@ int afs_mntpt_check_symlink(struct afs_v ret = -EIO; wait_on_page_locked(page); - buf = kmap(page); + buf = kmap_atomic(page, KM_USER0); if (!PageUptodate(page)) goto out_free; if (PageError(page)) @@ -101,7 +101,7 @@ int afs_mntpt_check_symlink(struct afs_v ret = 0; out_free: - kunmap(page); + kunmap_atomic(buf, KM_USER0); page_cache_release(page); out: _leave(" = %d", ret); @@ -188,9 +188,9 @@ static struct vfsmount *afs_mntpt_do_aut if (!PageUptodate(page) || PageError(page)) goto error; - buf = kmap(page); + buf = kmap_atomic(page, KM_USER0); memcpy(devname, buf, size); - kunmap(page); + kunmap_atomic(buf, KM_USER0); page_cache_release(page); page = NULL; @@ -269,12 +269,12 @@ static void *afs_mntpt_follow_link(struc */ static void afs_mntpt_expiry_timed_out(struct afs_timer *timer) { - kenter(""); +// kenter(""); mark_mounts_for_expiry(&afs_vfsmounts); afs_kafstimod_add_timer(&afs_mntpt_expiry_timer, afs_mntpt_expiry_timeout * HZ); - kleave(""); +// kleave(""); } /* end afs_mntpt_expiry_timed_out() */ diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 101d21b..db58488 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -177,6 +177,7 @@ int afs_proc_init(void) */ void afs_proc_cleanup(void) { + remove_proc_entry("rootcell", proc_afs); remove_proc_entry("cells", proc_afs); remove_proc_entry("fs/afs", NULL); diff --git a/fs/afs/server.c b/fs/afs/server.c index 22afaae..e94628c 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -375,7 +375,6 @@ int afs_server_request_callslot(struct a else if (list_empty(&server->fs_callq)) { /* no one waiting */ server->fs_conn_cnt[nconn]++; - spin_unlock(&server->fs_lock); } else { /* someone's waiting - dequeue them and wake them up */ @@ -393,9 +392,9 @@ int afs_server_request_callslot(struct a } pcallslot->ready = 1; wake_up_process(pcallslot->task); - spin_unlock(&server->fs_lock); } + spin_unlock(&server->fs_lock); rxrpc_put_connection(callslot->conn); callslot->conn = NULL; diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 331f730..20148bc 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -59,17 +59,21 @@ static LIST_HEAD(afs_vlocation_update_pe static struct afs_vlocation *afs_vlocation_update; /* VL currently being updated */ static DEFINE_SPINLOCK(afs_vlocation_update_lock); /* lock guarding update queue */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_vlocation_cache_match(void *target, - const void *entry); -static void afs_vlocation_cache_update(void *source, void *entry); - -struct cachefs_index_def afs_vlocation_cache_index_def = { - .name = "vldb", - .data_size = sizeof(struct afs_cache_vlocation), - .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 }, - .match = afs_vlocation_cache_match, - .update = afs_vlocation_cache_update, +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static fscache_checkaux_t afs_vlocation_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen); + +static struct fscache_cookie_def afs_vlocation_cache_index_def = { + .name = "AFS.vldb", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = afs_vlocation_cache_get_key, + .get_aux = afs_vlocation_cache_get_aux, + .check_aux = afs_vlocation_cache_check_aux, }; #endif @@ -300,13 +304,12 @@ int afs_vlocation_lookup(struct afs_cell list_add_tail(&vlocation->link, &cell->vl_list); -#ifdef AFS_CACHING_SUPPORT +#ifdef CONFIG_AFS_FSCACHE /* we want to store it in the cache, plus it might already be * encached */ - cachefs_acquire_cookie(cell->cache, - &afs_volume_cache_index_def, - vlocation, - &vlocation->cache); + vlocation->cache = fscache_acquire_cookie(cell->cache, + &afs_vlocation_cache_index_def, + vlocation); if (vlocation->valid) goto found_in_cache; @@ -340,7 +343,7 @@ #endif active: active = 1; -#ifdef AFS_CACHING_SUPPORT +#ifdef CONFIG_AFS_FSCACHE found_in_cache: #endif /* try to look up a cached volume in the cell VL databases by ID */ @@ -422,9 +425,9 @@ #endif afs_kafstimod_add_timer(&vlocation->upd_timer, 10 * HZ); -#ifdef AFS_CACHING_SUPPORT +#ifdef CONFIG_AFS_FSCACHE /* update volume entry in local cache */ - cachefs_update_cookie(vlocation->cache); + fscache_update_cookie(vlocation->cache); #endif *_vlocation = vlocation; @@ -438,8 +441,8 @@ #endif } else { list_del(&vlocation->link); -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(vlocation->cache, 0); +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(vlocation->cache, 0); #endif afs_put_cell(vlocation->cell); kfree(vlocation); @@ -536,8 +539,8 @@ void afs_vlocation_do_timeout(struct afs } /* we can now destroy it properly */ -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(vlocation->cache, 0); +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(vlocation->cache, 0); #endif afs_put_cell(cell); @@ -888,65 +891,103 @@ static void afs_vlocation_update_discard /*****************************************************************************/ /* - * match a VLDB record stored in the cache - * - may also load target from entry + * set the key for the index entry */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_vlocation_cache_match(void *target, - const void *entry) +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - const struct afs_cache_vlocation *vldb = entry; - struct afs_vlocation *vlocation = target; + const struct afs_vlocation *vlocation = cookie_netfs_data; + uint16_t klen; - _enter("{%s},{%s}", vlocation->vldb.name, vldb->name); + _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax); - if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0 - ) { - if (!vlocation->valid || - vlocation->vldb.rtime == vldb->rtime - ) { - vlocation->vldb = *vldb; - vlocation->valid = 1; - _leave(" = SUCCESS [c->m]"); - return CACHEFS_MATCH_SUCCESS; - } - /* need to update cache if cached info differs */ - else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) { - /* delete if VIDs for this name differ */ - if (memcmp(&vlocation->vldb.vid, - &vldb->vid, - sizeof(vldb->vid)) != 0) { - _leave(" = DELETE"); - return CACHEFS_MATCH_SUCCESS_DELETE; - } + klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name)); + if (klen > bufmax) + return 0; - _leave(" = UPDATE"); - return CACHEFS_MATCH_SUCCESS_UPDATE; - } - else { - _leave(" = SUCCESS"); - return CACHEFS_MATCH_SUCCESS; - } - } + memcpy(buffer, vlocation->vldb.name, klen); + + _leave(" = %u", klen); + return klen; - _leave(" = FAILED"); - return CACHEFS_MATCH_FAILED; -} /* end afs_vlocation_cache_match() */ +} /* end afs_vlocation_cache_get_key() */ #endif /*****************************************************************************/ /* - * update a VLDB record stored in the cache + * provide new auxilliary cache data */ -#ifdef AFS_CACHING_SUPPORT -static void afs_vlocation_cache_update(void *source, void *entry) +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - struct afs_cache_vlocation *vldb = entry; - struct afs_vlocation *vlocation = source; + const struct afs_vlocation *vlocation = cookie_netfs_data; + uint16_t dlen; - _enter(""); + _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax); + + dlen = sizeof(struct afs_cache_vlocation); + dlen -= offsetof(struct afs_cache_vlocation, nservers); + if (dlen > bufmax) + return 0; + + memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen); + + _leave(" = %u", dlen); + return dlen; + +} /* end afs_vlocation_cache_get_aux() */ +#endif + +/*****************************************************************************/ +/* + * check that the auxilliary data indicates that the entry is still valid + */ +#ifdef CONFIG_AFS_FSCACHE +static fscache_checkaux_t afs_vlocation_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen) +{ + const struct afs_cache_vlocation *cvldb; + struct afs_vlocation *vlocation = cookie_netfs_data; + uint16_t dlen; + + _enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen); + + /* check the size of the data is what we're expecting */ + dlen = sizeof(struct afs_cache_vlocation); + dlen -= offsetof(struct afs_cache_vlocation, nservers); + if (dlen != buflen) + return FSCACHE_CHECKAUX_OBSOLETE; + + cvldb = container_of(buffer, struct afs_cache_vlocation, nservers); + + /* if what's on disk is more valid than what's in memory, then use the + * VL record from the cache */ + if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) { + memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen); + vlocation->valid = 1; + _leave(" = SUCCESS [c->m]"); + return FSCACHE_CHECKAUX_OKAY; + } + + /* need to update the cache if the cached info differs */ + if (memcmp(&vlocation->vldb, buffer, dlen) != 0) { + /* delete if the volume IDs for this name differ */ + if (memcmp(&vlocation->vldb.vid, &cvldb->vid, + sizeof(cvldb->vid)) != 0 + ) { + _leave(" = OBSOLETE"); + return FSCACHE_CHECKAUX_OBSOLETE; + } + + _leave(" = UPDATE"); + return FSCACHE_CHECKAUX_NEEDS_UPDATE; + } - *vldb = vlocation->vldb; + _leave(" = OKAY"); + return FSCACHE_CHECKAUX_OKAY; -} /* end afs_vlocation_cache_update() */ +} /* end afs_vlocation_cache_check_aux() */ #endif diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index cf62da5..b6cba1e 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c @@ -29,17 +29,30 @@ struct afs_timer_ops afs_vnode_cb_timed_ .timed_out = afs_vnode_cb_timed_out, }; -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_vnode_cache_match(void *target, - const void *entry); -static void afs_vnode_cache_update(void *source, void *entry); - -struct cachefs_index_def afs_vnode_cache_index_def = { - .name = "vnode", - .data_size = sizeof(struct afs_cache_vnode), - .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 4 }, - .match = afs_vnode_cache_match, - .update = afs_vnode_cache_update, +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, + uint64_t *size); +static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static fscache_checkaux_t afs_vnode_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen); +static void afs_vnode_cache_mark_pages_cached(void *cookie_netfs_data, + struct address_space *mapping, + struct pagevec *cached_pvec); +static void afs_vnode_cache_now_uncached(void *cookie_netfs_data); + +struct fscache_cookie_def afs_vnode_cache_index_def = { + .name = "AFS.vnode", + .type = FSCACHE_COOKIE_TYPE_DATAFILE, + .get_key = afs_vnode_cache_get_key, + .get_attr = afs_vnode_cache_get_attr, + .get_aux = afs_vnode_cache_get_aux, + .check_aux = afs_vnode_cache_check_aux, + .mark_pages_cached = afs_vnode_cache_mark_pages_cached, + .now_uncached = afs_vnode_cache_now_uncached, }; #endif @@ -188,6 +201,8 @@ int afs_vnode_fetch_status(struct afs_vn if (vnode->update_cnt > 0) { /* someone else started a fetch */ + _debug("conflict"); + set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&vnode->update_waitq, &myself); @@ -219,6 +234,7 @@ int afs_vnode_fetch_status(struct afs_vn spin_unlock(&vnode->lock); set_current_state(TASK_RUNNING); + _leave(" [conflicted, %d", !!(vnode->flags & AFS_VNODE_DELETED)); return vnode->flags & AFS_VNODE_DELETED ? -ENOENT : 0; } @@ -341,54 +357,198 @@ int afs_vnode_give_up_callback(struct af /*****************************************************************************/ /* - * match a vnode record stored in the cache + * set the key for the index entry */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_vnode_cache_match(void *target, - const void *entry) +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - const struct afs_cache_vnode *cvnode = entry; - struct afs_vnode *vnode = target; + const struct afs_vnode *vnode = cookie_netfs_data; + uint16_t klen; - _enter("{%x,%x,%Lx},{%x,%x,%Lx}", - vnode->fid.vnode, - vnode->fid.unique, - vnode->status.version, - cvnode->vnode_id, - cvnode->vnode_unique, - cvnode->data_version); - - if (vnode->fid.vnode != cvnode->vnode_id) { - _leave(" = FAILED"); - return CACHEFS_MATCH_FAILED; + _enter("{%x,%x,%Lx},%p,%u", + vnode->fid.vnode, vnode->fid.unique, vnode->status.version, + buffer, bufmax); + + klen = sizeof(vnode->fid.vnode); + if (klen > bufmax) + return 0; + + memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode)); + + _leave(" = %u", klen); + return klen; + +} /* end afs_vnode_cache_get_key() */ +#endif + +/*****************************************************************************/ +/* + * provide an updated file attributes + */ +#ifdef CONFIG_AFS_FSCACHE +static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, + uint64_t *size) +{ + const struct afs_vnode *vnode = cookie_netfs_data; + + _enter("{%x,%x,%Lx},", + vnode->fid.vnode, vnode->fid.unique, vnode->status.version); + + *size = i_size_read((struct inode *) &vnode->vfs_inode); + +} /* end afs_vnode_cache_get_attr() */ +#endif + +/*****************************************************************************/ +/* + * provide new auxilliary cache data + */ +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct afs_vnode *vnode = cookie_netfs_data; + uint16_t dlen; + + _enter("{%x,%x,%Lx},%p,%u", + vnode->fid.vnode, vnode->fid.unique, vnode->status.version, + buffer, bufmax); + + dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.version); + if (dlen > bufmax) + return 0; + + memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique)); + buffer += sizeof(vnode->fid.unique); + memcpy(buffer, &vnode->status.version, sizeof(vnode->status.version)); + + _leave(" = %u", dlen); + return dlen; + +} /* end afs_vnode_cache_get_aux() */ +#endif + +/*****************************************************************************/ +/* + * check that the auxilliary data indicates that the entry is still valid + */ +#ifdef CONFIG_AFS_FSCACHE +static fscache_checkaux_t afs_vnode_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen) +{ + struct afs_vnode *vnode = cookie_netfs_data; + uint16_t dlen; + + _enter("{%x,%x,%Lx},%p,%u", + vnode->fid.vnode, vnode->fid.unique, vnode->status.version, + buffer, buflen); + + /* check the size of the data is what we're expecting */ + dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.version); + if (dlen != buflen) { + _leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen); + return FSCACHE_CHECKAUX_OBSOLETE; } - if (vnode->fid.unique != cvnode->vnode_unique || - vnode->status.version != cvnode->data_version) { - _leave(" = DELETE"); - return CACHEFS_MATCH_SUCCESS_DELETE; + if (memcmp(buffer, + &vnode->fid.unique, + sizeof(vnode->fid.unique) + ) != 0 + ) { + unsigned unique; + + memcpy(&unique, buffer, sizeof(unique)); + + _leave(" = OBSOLETE [uniq %x != %x]", + unique, vnode->fid.unique); + return FSCACHE_CHECKAUX_OBSOLETE; + } + + if (memcmp(buffer + sizeof(vnode->fid.unique), + &vnode->status.version, + sizeof(vnode->status.version) + ) != 0 + ) { + afs_dataversion_t version; + + memcpy(&version, buffer + sizeof(vnode->fid.unique), + sizeof(version)); + + _leave(" = OBSOLETE [vers %llx != %llx]", + version, vnode->status.version); + return FSCACHE_CHECKAUX_OBSOLETE; } _leave(" = SUCCESS"); - return CACHEFS_MATCH_SUCCESS; -} /* end afs_vnode_cache_match() */ + return FSCACHE_CHECKAUX_OKAY; + +} /* end afs_vnode_cache_check_aux() */ #endif /*****************************************************************************/ /* - * update a vnode record stored in the cache + * indication of pages that now have cache metadata retained + * - this function should mark the specified pages as now being cached */ -#ifdef AFS_CACHING_SUPPORT -static void afs_vnode_cache_update(void *source, void *entry) +#ifdef CONFIG_AFS_FSCACHE +static void afs_vnode_cache_mark_pages_cached(void *cookie_netfs_data, + struct address_space *mapping, + struct pagevec *cached_pvec) { - struct afs_cache_vnode *cvnode = entry; - struct afs_vnode *vnode = source; + unsigned long loop; - _enter(""); + for (loop = 0; loop < cached_pvec->nr; loop++) { + struct page *page = cached_pvec->pages[loop]; - cvnode->vnode_id = vnode->fid.vnode; - cvnode->vnode_unique = vnode->fid.unique; - cvnode->data_version = vnode->status.version; + _debug("- mark %p{%lx}", page, page->index); -} /* end afs_vnode_cache_update() */ + SetPagePrivate(page); + } + +} /* end afs_vnode_cache_mark_pages_cached() */ #endif + +/*****************************************************************************/ +/* + * indication the cookie is no longer uncached + * - this function is called when the backing store currently caching a cookie + * is removed + * - the netfs should use this to clean up any markers indicating cached pages + * - this is mandatory for any object that may have data + */ +static void afs_vnode_cache_now_uncached(void *cookie_netfs_data) +{ + struct afs_vnode *vnode = cookie_netfs_data; + struct pagevec pvec; + pgoff_t first; + int loop, nr_pages; + + _enter("{%x,%x,%Lx}", + vnode->fid.vnode, vnode->fid.unique, vnode->status.version); + + pagevec_init(&pvec, 0); + first = 0; + + for (;;) { + /* grab a bunch of pages to clean */ + nr_pages = pagevec_lookup(&pvec, vnode->vfs_inode.i_mapping, + first, + PAGEVEC_SIZE - pagevec_count(&pvec)); + if (!nr_pages) + break; + + for (loop = 0; loop < nr_pages; loop++) + ClearPagePrivate(pvec.pages[loop]); + + first = pvec.pages[nr_pages - 1]->index + 1; + + pvec.nr = nr_pages; + pagevec_release(&pvec); + cond_resched(); + } + + _leave(""); + +} /* end afs_vnode_cache_now_uncached() */ diff --git a/fs/afs/vnode.h b/fs/afs/vnode.h index b86a971..3f0602d 100644 --- a/fs/afs/vnode.h +++ b/fs/afs/vnode.h @@ -13,9 +13,9 @@ #ifndef _LINUX_AFS_VNODE_H #define _LINUX_AFS_VNODE_H #include +#include #include "server.h" #include "kafstimod.h" -#include "cache.h" #ifdef __KERNEL__ @@ -32,8 +32,8 @@ struct afs_cache_vnode afs_dataversion_t data_version; /* data version */ }; -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_index_def afs_vnode_cache_index_def; +#ifdef CONFIG_AFS_FSCACHE +extern struct fscache_cookie_def afs_vnode_cache_index_def; #endif /*****************************************************************************/ @@ -47,8 +47,8 @@ struct afs_vnode struct afs_volume *volume; /* volume on which vnode resides */ struct afs_fid fid; /* the file identifier for this inode */ struct afs_file_status status; /* AFS status info for this file */ -#ifdef AFS_CACHING_SUPPORT - struct cachefs_cookie *cache; /* caching cookie */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ #endif wait_queue_head_t update_waitq; /* status fetch waitqueue */ diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 0ff4b86..0bd5578 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -15,10 +15,10 @@ #include #include #include #include +#include #include "volume.h" #include "vnode.h" #include "cell.h" -#include "cache.h" #include "cmservice.h" #include "fsclient.h" #include "vlclient.h" @@ -28,18 +28,14 @@ #ifdef __KDEBUG static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; #endif -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_volume_cache_match(void *target, - const void *entry); -static void afs_volume_cache_update(void *source, void *entry); - -struct cachefs_index_def afs_volume_cache_index_def = { - .name = "volume", - .data_size = sizeof(struct afs_cache_vhash), - .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 1 }, - .keys[1] = { CACHEFS_INDEX_KEYS_BIN, 1 }, - .match = afs_volume_cache_match, - .update = afs_volume_cache_update, +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); + +static struct fscache_cookie_def afs_volume_cache_index_def = { + .name = "AFS.volume", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = afs_volume_cache_get_key, }; #endif @@ -214,11 +210,10 @@ int afs_volume_lookup(const char *name, } /* attach the cache and volume location */ -#ifdef AFS_CACHING_SUPPORT - cachefs_acquire_cookie(vlocation->cache, - &afs_vnode_cache_index_def, - volume, - &volume->cache); +#ifdef CONFIG_AFS_FSCACHE + volume->cache = fscache_acquire_cookie(vlocation->cache, + &afs_volume_cache_index_def, + volume); #endif afs_get_vlocation(vlocation); @@ -286,8 +281,8 @@ void afs_put_volume(struct afs_volume *v up_write(&vlocation->cell->vl_sem); /* finish cleaning up the volume */ -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(volume->cache, 0); +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(volume->cache, 0); #endif afs_put_vlocation(vlocation); @@ -481,40 +476,25 @@ int afs_volume_release_fileserver(struct /*****************************************************************************/ /* - * match a volume hash record stored in the cache + * set the key for the index entry */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_volume_cache_match(void *target, - const void *entry) +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - const struct afs_cache_vhash *vhash = entry; - struct afs_volume *volume = target; - - _enter("{%u},{%u}", volume->type, vhash->vtype); + const struct afs_volume *volume = cookie_netfs_data; + uint16_t klen; - if (volume->type == vhash->vtype) { - _leave(" = SUCCESS"); - return CACHEFS_MATCH_SUCCESS; - } - - _leave(" = FAILED"); - return CACHEFS_MATCH_FAILED; -} /* end afs_volume_cache_match() */ -#endif + _enter("{%u},%p,%u", volume->type, buffer, bufmax); -/*****************************************************************************/ -/* - * update a volume hash record stored in the cache - */ -#ifdef AFS_CACHING_SUPPORT -static void afs_volume_cache_update(void *source, void *entry) -{ - struct afs_cache_vhash *vhash = entry; - struct afs_volume *volume = source; + klen = sizeof(volume->type); + if (klen > bufmax) + return 0; - _enter(""); + memcpy(buffer, &volume->type, sizeof(volume->type)); - vhash->vtype = volume->type; + _leave(" = %u", klen); + return klen; -} /* end afs_volume_cache_update() */ +} /* end afs_volume_cache_get_key() */ #endif diff --git a/fs/afs/volume.h b/fs/afs/volume.h index bfdcf19..fc9895a 100644 --- a/fs/afs/volume.h +++ b/fs/afs/volume.h @@ -12,11 +12,11 @@ #ifndef _LINUX_AFS_VOLUME_H #define _LINUX_AFS_VOLUME_H +#include #include "types.h" #include "fsclient.h" #include "kafstimod.h" #include "kafsasyncd.h" -#include "cache.h" typedef enum { AFS_VLUPD_SLEEP, /* sleeping waiting for update timer to fire */ @@ -45,24 +45,6 @@ #define AFS_VOL_VTM_BAK 0x04 /* backup v time_t rtime; /* last retrieval time */ }; -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_index_def afs_vlocation_cache_index_def; -#endif - -/*****************************************************************************/ -/* - * volume -> vnode hash table entry - */ -struct afs_cache_vhash -{ - afs_voltype_t vtype; /* which volume variation */ - uint8_t hash_bucket; /* which hash bucket this represents */ -} __attribute__((packed)); - -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_index_def afs_volume_cache_index_def; -#endif - /*****************************************************************************/ /* * AFS volume location record @@ -73,8 +55,8 @@ struct afs_vlocation struct list_head link; /* link in cell volume location list */ struct afs_timer timeout; /* decaching timer */ struct afs_cell *cell; /* cell to which volume belongs */ -#ifdef AFS_CACHING_SUPPORT - struct cachefs_cookie *cache; /* caching cookie */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ #endif struct afs_cache_vlocation vldb; /* volume information DB record */ struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ @@ -109,8 +91,8 @@ struct afs_volume atomic_t usage; struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */ struct afs_vlocation *vlocation; /* volume location */ -#ifdef AFS_CACHING_SUPPORT - struct cachefs_cookie *cache; /* caching cookie */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ #endif afs_volid_t vid; /* volume ID */ afs_voltype_t type; /* type of volume */ diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index d6603d0..47e38f3 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -96,7 +96,6 @@ #define AUTOFS_TYPE_OFFSET 0x0004 struct autofs_sb_info { u32 magic; - struct dentry *root; int pipefd; struct file *pipe; pid_t oz_pgrp; @@ -231,4 +230,4 @@ out: } void autofs4_dentry_release(struct dentry *); - +extern void autofs4_kill_sb(struct super_block *); diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index 5d91933..723a1c5 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c @@ -24,7 +24,7 @@ static struct file_system_type autofs_fs .owner = THIS_MODULE, .name = "autofs", .get_sb = autofs_get_sb, - .kill_sb = kill_anon_super, + .kill_sb = autofs4_kill_sb, }; static int __init init_autofs4_fs(void) diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index fde78b1..1bf68c5 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -95,7 +95,7 @@ void autofs4_free_ino(struct autofs_info */ static void autofs4_force_release(struct autofs_sb_info *sbi) { - struct dentry *this_parent = sbi->root; + struct dentry *this_parent = sbi->sb->s_root; struct list_head *next; spin_lock(&dcache_lock); @@ -126,7 +126,7 @@ resume: spin_lock(&dcache_lock); } - if (this_parent != sbi->root) { + if (this_parent != sbi->sb->s_root) { struct dentry *dentry = this_parent; next = this_parent->d_u.d_child.next; @@ -139,15 +139,9 @@ resume: goto resume; } spin_unlock(&dcache_lock); - - dput(sbi->root); - sbi->root = NULL; - shrink_dcache_sb(sbi->sb); - - return; } -static void autofs4_put_super(struct super_block *sb) +void autofs4_kill_sb(struct super_block *sb) { struct autofs_sb_info *sbi = autofs4_sbi(sb); @@ -162,6 +156,7 @@ static void autofs4_put_super(struct sup kfree(sbi); DPRINTK("shutting down"); + kill_anon_super(sb); } static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) @@ -188,7 +183,6 @@ static int autofs4_show_options(struct s } static struct super_operations autofs4_sops = { - .put_super = autofs4_put_super, .statfs = simple_statfs, .show_options = autofs4_show_options, }; @@ -314,7 +308,6 @@ int autofs4_fill_super(struct super_bloc s->s_fs_info = sbi; sbi->magic = AUTOFS_SBI_MAGIC; - sbi->root = NULL; sbi->pipefd = -1; sbi->catatonic = 0; sbi->exp_timeout = 0; @@ -396,13 +389,6 @@ int autofs4_fill_super(struct super_bloc sbi->pipefd = pipefd; /* - * Take a reference to the root dentry so we get a chance to - * clean up the dentry tree on umount. - * See autofs4_force_release. - */ - sbi->root = dget(root); - - /* * Success! Install the root dentry now to indicate completion. */ s->s_root = root; diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index ce103e7..c0a6c8d 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -45,7 +45,6 @@ void autofs4_catatonic_mode(struct autof fput(sbi->pipe); /* Close the pipe */ sbi->pipe = NULL; } - shrink_dcache_sb(sbi->sb); } static int autofs4_write(struct file *file, const void *addr, int bytes) diff --git a/fs/buffer.c b/fs/buffer.c index 3660dcb..31a01da 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -184,6 +184,8 @@ int fsync_super(struct super_block *sb) return sync_blockdev(sb->s_bdev); } +EXPORT_SYMBOL(fsync_super); + /* * Write out and wait upon all dirty data associated with this * device. Filesystem data as well as the underlying block diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile new file mode 100644 index 0000000..c1522d9 --- /dev/null +++ b/fs/cachefiles/Makefile @@ -0,0 +1,18 @@ +# +# Makefile for caching in a mounted filesystem +# + +cachefiles-objs := \ + cf-bind.o \ + cf-interface.o \ + cf-key.o \ + cf-main.o \ + cf-namei.o \ + cf-proc.o \ + cf-xattr.o + +ifeq ($(CONFIG_SYSCTL),y) +cachefiles-objs += cf-sysctl.o +endif + +obj-$(CONFIG_CACHEFILES) := cachefiles.o diff --git a/fs/cachefiles/cf-bind.c b/fs/cachefiles/cf-bind.c new file mode 100644 index 0000000..0c14c37 --- /dev/null +++ b/fs/cachefiles/cf-bind.c @@ -0,0 +1,283 @@ +/* cf-bind.c: bind and unbind a cache from the filesystem backing it + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +static int cachefiles_proc_add_cache(struct cachefiles_cache *cache, + struct vfsmount *mnt); + +/*****************************************************************************/ +/* + * bind a directory as a cache + */ +int cachefiles_proc_bind(struct cachefiles_cache *cache, char *args) +{ + _enter("{%u,%u,%u},%s", + cache->brun_percent, + cache->bcull_percent, + cache->bstop_percent, + args); + + /* start by checking things over */ + ASSERT(cache->bstop_percent >= 0 && + cache->bstop_percent < cache->bcull_percent && + cache->bcull_percent < cache->brun_percent && + cache->brun_percent < 100); + + if (*args) { + kerror("'bind' command doesn't take an argument"); + return -EINVAL; + } + + if (!cache->rootdirname) { + kerror("No cache directory specified"); + return -EINVAL; + } + + /* don't permit already bound caches to be re-bound */ + if (test_bit(CACHEFILES_READY, &cache->flags)) { + kerror("Cache already bound"); + return -EBUSY; + } + + /* make sure we have copies of the tag and dirname strings */ + if (!cache->tag) { + /* the tag string is released by the fops->release() + * function, so we don't release it on error here */ + cache->tag = kstrdup("CacheFiles", GFP_KERNEL); + if (!cache->tag) + return -ENOMEM; + } + + /* add the cache */ + return cachefiles_proc_add_cache(cache, NULL); + +} /* end cachefiles_proc_bind() */ + +/*****************************************************************************/ +/* + * add a cache + */ +static int cachefiles_proc_add_cache(struct cachefiles_cache *cache, + struct vfsmount *mnt) +{ + struct cachefiles_object *fsdef; + struct nameidata nd; + struct kstatfs stats; + struct dentry *graveyard, *cachedir, *root; + int ret; + + _enter(""); + + /* allocate the root index object */ + ret = -ENOMEM; + + fsdef = kmem_cache_alloc(cachefiles_object_jar, SLAB_KERNEL); + if (!fsdef) + goto error_root_object; + + atomic_set(&fsdef->usage, 1); + atomic_set(&fsdef->fscache_usage, 1); + fsdef->type = FSCACHE_COOKIE_TYPE_INDEX; + + _debug("- fsdef %p", fsdef); + + /* look up the directory at the root of the cache */ + memset(&nd, 0, sizeof(nd)); + + ret = path_lookup(cache->rootdirname, LOOKUP_DIRECTORY, &nd); + if (ret < 0) + goto error_open_root; + + /* bind to the special mountpoint we've prepared */ + if (mnt) { + atomic_inc(&nd.mnt->mnt_sb->s_active); + mnt->mnt_sb = nd.mnt->mnt_sb; + mnt->mnt_flags = nd.mnt->mnt_flags; + mnt->mnt_flags |= MNT_NOSUID | MNT_NOEXEC | MNT_NODEV; + mnt->mnt_root = dget(nd.dentry); + mnt->mnt_mountpoint = mnt->mnt_root; + + /* copy the name, but ignore kstrdup() failing ENOMEM - we'll + * just end up with an devicenameless mountpoint */ + mnt->mnt_devname = kstrdup(nd.mnt->mnt_devname, GFP_KERNEL); + path_release(&nd); + + cache->mnt = mntget(mnt); + root = dget(mnt->mnt_root); + } + else { + cache->mnt = nd.mnt; + root = nd.dentry; + + nd.mnt = NULL; + nd.dentry = NULL; + path_release(&nd); + } + + /* check parameters */ + ret = -EOPNOTSUPP; + if (!root->d_inode || + !root->d_inode->i_op || + !root->d_inode->i_op->lookup || + !root->d_inode->i_op->mkdir || + !root->d_inode->i_op->setxattr || + !root->d_inode->i_op->getxattr || + !root->d_sb || + !root->d_sb->s_op || + !root->d_sb->s_op->statfs || + !root->d_sb->s_op->sync_fs) + goto error_unsupported; + + ret = -EROFS; + if (root->d_sb->s_flags & MS_RDONLY) + goto error_unsupported; + + /* get the cache size and blocksize */ + ret = root->d_sb->s_op->statfs(root, &stats); + if (ret < 0) + goto error_unsupported; + + ret = -ERANGE; + if (stats.f_bsize <= 0) + goto error_unsupported; + + ret = -EOPNOTSUPP; + if (stats.f_bsize > PAGE_SIZE) + goto error_unsupported; + + cache->bsize = stats.f_bsize; + cache->bshift = 0; + if (stats.f_bsize < PAGE_SIZE) + cache->bshift = PAGE_SHIFT - long_log2(stats.f_bsize); + + _debug("blksize %u (shift %u)", + cache->bsize, cache->bshift); + + _debug("size %llu, avail %llu", stats.f_blocks, stats.f_bavail); + + /* set up caching limits */ + stats.f_blocks >>= cache->bshift; + do_div(stats.f_blocks, 100); + cache->bstop = stats.f_blocks * cache->bstop_percent; + cache->bcull = stats.f_blocks * cache->bcull_percent; + cache->brun = stats.f_blocks * cache->brun_percent; + + _debug("limits {%llu,%llu,%llu}", + cache->brun, + cache->bcull, + cache->bstop); + + /* get the cache directory and check its type */ + cachedir = cachefiles_get_directory(cache, root, "cache"); + if (IS_ERR(cachedir)) { + ret = PTR_ERR(cachedir); + goto error_unsupported; + } + + fsdef->dentry = cachedir; + + ret = cachefiles_check_object_type(fsdef); + if (ret < 0) + goto error_unsupported; + + /* get the graveyard directory */ + graveyard = cachefiles_get_directory(cache, root, "graveyard"); + if (IS_ERR(graveyard)) { + ret = PTR_ERR(graveyard); + goto error_unsupported; + } + + cache->graveyard = graveyard; + + /* publish the cache */ + fscache_init_cache(&cache->cache, + &cachefiles_cache_ops, + "%02x:%02x", + MAJOR(fsdef->dentry->d_sb->s_dev), + MINOR(fsdef->dentry->d_sb->s_dev) + ); + + ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag); + if (ret < 0) + goto error_add_cache; + + /* done */ + set_bit(CACHEFILES_READY, &cache->flags); + dput(root); + + printk(KERN_INFO "CacheFiles:" + " File cache on %s registered\n", + cache->cache.identifier); + + /* check how much space the cache has */ + cachefiles_has_space(cache, 0); + + return 0; + +error_add_cache: + dput(cache->graveyard); + cache->graveyard = NULL; +error_unsupported: + mntput(cache->mnt); + cache->mnt = NULL; + dput(fsdef->dentry); + fsdef->dentry = NULL; + dput(root); +error_open_root: + kmem_cache_free(cachefiles_object_jar, fsdef); +error_root_object: + kerror("Failed to register: %d", ret); + return ret; + +} /* end cachefiles_proc_add_cache() */ + +/*****************************************************************************/ +/* + * unbind a cache on fd release + */ +void cachefiles_proc_unbind(struct cachefiles_cache *cache) +{ + _enter(""); + + if (test_bit(CACHEFILES_READY, &cache->flags)) { + printk(KERN_INFO "CacheFiles:" + " File cache on %s unregistering\n", + cache->cache.identifier); + + fscache_withdraw_cache(&cache->cache); + } + + if (cache->cache.fsdef) + cache->cache.ops->put_object(cache->cache.fsdef); + + dput(cache->graveyard); + mntput(cache->mnt); + + kfree(cache->rootdirname); + kfree(cache->tag); + + _leave(""); + +} /* end cachefiles_proc_unbind() */ diff --git a/fs/cachefiles/cf-interface.c b/fs/cachefiles/cf-interface.c new file mode 100644 index 0000000..32ad844 --- /dev/null +++ b/fs/cachefiles/cf-interface.c @@ -0,0 +1,1315 @@ +/* cf-interface.c: CacheFiles to FS-Cache interface + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +#define list_to_page(head) (list_entry((head)->prev, struct page, lru)) +#define log2(n) ffz(~(n)) + +/*****************************************************************************/ +/* + * look up the nominated node in this cache, creating it if necessary + */ +static struct fscache_object *cachefiles_lookup_object( + struct fscache_cache *_cache, + struct fscache_object *_parent, + struct fscache_cookie *cookie) +{ + struct cachefiles_object *parent, *object; + struct cachefiles_cache *cache; + struct cachefiles_xattr *auxdata; + unsigned keylen, auxlen; + void *buffer; + char *key; + int ret; + + ASSERT(_parent); + + cache = container_of(_cache, struct cachefiles_cache, cache); + parent = container_of(_parent, struct cachefiles_object, fscache); + + _enter("{%s},%p,%p", cache->cache.identifier, parent, cookie); + + /* create a new object record and a temporary leaf image */ + object = kmem_cache_alloc(cachefiles_object_jar, SLAB_KERNEL); + if (!object) + goto nomem_object; + + atomic_set(&object->usage, 1); + atomic_set(&object->fscache_usage, 1); + + fscache_object_init(&object->fscache); + object->fscache.cookie = cookie; + object->fscache.cache = parent->fscache.cache; + + object->type = cookie->def->type; + + /* get hold of the raw key + * - stick the length on the front and leave space on the back for the + * encoder + */ + buffer = kmalloc((2 + 512) + 3, GFP_KERNEL); + if (!buffer) + goto nomem_buffer; + + keylen = cookie->def->get_key(cookie->netfs_data, buffer + 2, 512); + ASSERTCMP(keylen, <, 512); + + *(uint16_t *)buffer = keylen; + ((char *)buffer)[keylen + 2] = 0; + ((char *)buffer)[keylen + 3] = 0; + ((char *)buffer)[keylen + 4] = 0; + + /* turn the raw key into something that can work with as a filename */ + key = cachefiles_cook_key(buffer, keylen + 2, object->type); + if (!key) + goto nomem_key; + + /* get hold of the auxiliary data and prepend the object type */ + auxdata = buffer; + auxlen = 0; + if (cookie->def->get_aux) { + auxlen = cookie->def->get_aux(cookie->netfs_data, + auxdata->data, 511); + ASSERTCMP(auxlen, <, 511); + } + + auxdata->len = auxlen + 1; + auxdata->type = cookie->def->type; + + /* look up the key, creating any missing bits */ + ret = cachefiles_walk_to_object(parent, object, key, auxdata); + if (ret < 0) + goto lookup_failed; + + kfree(buffer); + kfree(key); + _leave(" = %p", &object->fscache); + return &object->fscache; + +lookup_failed: + kmem_cache_free(cachefiles_object_jar, object); + kfree(buffer); + kfree(key); + _leave(" = %d", ret); + return ERR_PTR(ret); + +nomem_key: + kfree(buffer); +nomem_buffer: + kmem_cache_free(cachefiles_object_jar, object); +nomem_object: + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); + +} /* end cachefiles_lookup_object() */ + +/*****************************************************************************/ +/* + * increment the usage count on an inode object (may fail if unmounting) + */ +static struct fscache_object *cachefiles_grab_object(struct fscache_object *_object) +{ + struct cachefiles_object *object; + + _enter("%p", _object); + + object = container_of(_object, struct cachefiles_object, fscache); + +#ifdef CACHEFILES_DEBUG_SLAB + ASSERT((atomic_read(&object->fscache_usage) & 0xffff0000) != 0x6b6b0000); +#endif + + atomic_inc(&object->fscache_usage); + return &object->fscache; + +} /* end cachefiles_grab_object() */ + +/*****************************************************************************/ +/* + * lock the semaphore on an object object + */ +static void cachefiles_lock_object(struct fscache_object *_object) +{ + struct cachefiles_object *object; + + _enter("%p", _object); + + object = container_of(_object, struct cachefiles_object, fscache); + +#ifdef CACHEFILES_DEBUG_SLAB + ASSERT((atomic_read(&object->fscache_usage) & 0xffff0000) != 0x6b6b0000); +#endif + + down_write(&object->sem); + +} /* end cachefiles_lock_object() */ + +/*****************************************************************************/ +/* + * unlock the semaphore on an object object + */ +static void cachefiles_unlock_object(struct fscache_object *_object) +{ + struct cachefiles_object *object; + + _enter("%p", _object); + + object = container_of(_object, struct cachefiles_object, fscache); + up_write(&object->sem); + +} /* end cachefiles_unlock_object() */ + +/*****************************************************************************/ +/* + * update the auxilliary data for an object object on disk + */ +static void cachefiles_update_object(struct fscache_object *_object) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + + _enter("%p", _object); + + object = container_of(_object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, struct cachefiles_cache, cache); + + //cachefiles_tree_update_object(super, object); + +} /* end cachefiles_update_object() */ + +/*****************************************************************************/ +/* + * dispose of a reference to an object object + */ +static void cachefiles_put_object(struct fscache_object *_object) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + int ret; + + ASSERT(_object); + + object = container_of(_object, struct cachefiles_object, fscache); + _enter("%p{%d}", object, atomic_read(&object->usage)); + + ASSERT(object); + + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + +#ifdef CACHEFILES_DEBUG_SLAB + ASSERT((atomic_read(&object->fscache_usage) & 0xffff0000) != 0x6b6b0000); +#endif + + if (!atomic_dec_and_test(&object->fscache_usage)) + return; + + _debug("- kill object %p", object); + + /* delete retired objects */ + if (test_bit(FSCACHE_OBJECT_RECYCLING, &object->fscache.flags) && + _object != cache->cache.fsdef + ) { + _debug("- retire object %p", object); + cachefiles_delete_object(cache, object); + } + + /* close the filesystem stuff attached to the object */ + if (object->backer) { + if (object->backer->f_op && + object->backer->f_op->flush + ) { + ret = object->backer->f_op->flush(object->backer, + NULL); + if (ret < 0) + kerror("Backing file flush returned error %d", + ret); + } + fput(object->backer); + object->backer = NULL; + } + + /* note that an object is now inactive */ + write_lock(&cache->active_lock); + rb_erase(&object->active_node, &cache->active_nodes); + write_unlock(&cache->active_lock); + + dput(object->dentry); + object->dentry = NULL; + + /* then dispose of the object */ + kmem_cache_free(cachefiles_object_jar, object); + + _leave(""); + +} /* end cachefiles_put_object() */ + +/*****************************************************************************/ +/* + * sync a cache + */ +static void cachefiles_sync_cache(struct fscache_cache *_cache) +{ + struct cachefiles_cache *cache; + int ret; + + _enter("%p", _cache); + + cache = container_of(_cache, struct cachefiles_cache, cache); + + /* make sure all pages pinned by operations on behalf of the netfs are + * written to disc */ + ret = fsync_super(cache->mnt->mnt_sb); + if (ret == -EIO) + cachefiles_io_error(cache, + "Attempt to sync backing fs superblock" + " returned error %d", + ret); + +} /* end cachefiles_sync_cache() */ + +/*****************************************************************************/ +/* + * set the data size on an object + */ +static int cachefiles_set_i_size(struct fscache_object *_object, loff_t i_size) +{ + struct cachefiles_object *object; + struct iattr newattrs; + int ret; + + _enter("%p,%llu", _object, i_size); + + object = container_of(_object, struct cachefiles_object, fscache); + + if (i_size == object->i_size) + return 0; + + if (!object->backer) + return -ENOBUFS; + + ASSERT(S_ISREG(object->backer->f_dentry->d_inode->i_mode)); + + newattrs.ia_size = i_size; + newattrs.ia_file = object->backer; + newattrs.ia_valid = ATTR_SIZE | ATTR_FILE; + + mutex_lock(&object->backer->f_dentry->d_inode->i_mutex); + ret = notify_change(object->backer->f_dentry, &newattrs); + mutex_unlock(&object->backer->f_dentry->d_inode->i_mutex); + + if (ret == -EIO) { + cachefiles_io_error_obj(object, "Size set failed"); + ret = -ENOBUFS; + } + + _leave(" = %d", ret); + return ret; + +} /* end cachefiles_set_i_size() */ + +/*****************************************************************************/ +/* + * see if we have space for a number of pages in the cache + */ +int cachefiles_has_space(struct cachefiles_cache *cache, unsigned nr) +{ + struct kstatfs stats; + int ret; + + _enter("{%llu,%llu,%llu},%d", + cache->brun, cache->bcull, cache->bstop, nr); + + /* find out how many pages of blockdev are available */ + memset(&stats, 0, sizeof(stats)); + + ret = cache->mnt->mnt_sb->s_op->statfs(cache->mnt->mnt_root, &stats); + if (ret < 0) { + if (ret == -EIO) + cachefiles_io_error(cache, "statfs failed"); + return ret; + } + + stats.f_bavail >>= cache->bshift; + + _debug("avail %llu", stats.f_bavail); + + /* see if there is sufficient space */ + stats.f_bavail -= nr; + + ret = -ENOBUFS; + if (stats.f_bavail < cache->bstop) + goto begin_cull; + + ret = 0; + if (stats.f_bavail < cache->bcull) + goto begin_cull; + + if (test_bit(CACHEFILES_CULLING, &cache->flags) && + stats.f_bavail >= cache->brun + ) { + if (test_and_clear_bit(CACHEFILES_CULLING, &cache->flags)) { + _debug("cease culling"); + send_sigurg(&cache->cachefilesd->f_owner); + } + } + + _leave(" = 0"); + return 0; + +begin_cull: + if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) { + _debug("### CULL CACHE ###"); + send_sigurg(&cache->cachefilesd->f_owner); + } + + _leave(" = %d", ret); + return ret; + +} /* end cachefiles_has_space() */ + +/*****************************************************************************/ +/* + * waiting reading backing files + */ +static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode, + int sync, void *_key) +{ + struct cachefiles_one_read *monitor = + container_of(wait, struct cachefiles_one_read, monitor); + struct wait_bit_key *key = _key; + struct page *page = wait->private; + + ASSERT(key); + + _enter("{%lu},%u,%d,{%p,%u}", + monitor->netfs_page->index, mode, sync, + key->flags, key->bit_nr); + + if (key->flags != &page->flags || + key->bit_nr != PG_locked) + return 0; + + _debug("--- monitor %p %lx ---", page, page->flags); + + if (!PageUptodate(page) && !PageError(page)) + dump_stack(); + + /* remove from the waitqueue */ + list_del(&wait->task_list); + + /* move onto the action list and queue for keventd */ + ASSERT(monitor->object); + + spin_lock(&monitor->object->work_lock); + list_move(&monitor->obj_link, &monitor->object->read_list); + spin_unlock(&monitor->object->work_lock); + + schedule_work(&monitor->object->read_work); + + return 0; + +} /* end cachefiles_read_waiter() */ + +/*****************************************************************************/ +/* + * let keventd drive the copying of pages + */ +void cachefiles_read_copier_work(void *_object) +{ + struct cachefiles_one_read *monitor; + struct cachefiles_object *object = _object; + struct fscache_cookie *cookie = object->fscache.cookie; + struct pagevec pagevec; + int error, max; + + _enter("{ino=%lu}", object->backer->f_dentry->d_inode->i_ino); + + pagevec_init(&pagevec, 0); + + max = 8; + spin_lock_irq(&object->work_lock); + + while (!list_empty(&object->read_list)) { + monitor = list_entry(object->read_list.next, + struct cachefiles_one_read, obj_link); + list_del(&monitor->obj_link); + + spin_unlock_irq(&object->work_lock); + + _debug("- copy {%lu}", monitor->back_page->index); + + error = -EIO; + if (PageUptodate(monitor->back_page)) { + copy_highpage(monitor->netfs_page, monitor->back_page); + + pagevec_add(&pagevec, monitor->netfs_page); + cookie->def->mark_pages_cached( + cookie->netfs_data, + monitor->netfs_page->mapping, + &pagevec); + pagevec_reinit(&pagevec); + + error = 0; + } + + if (error) + cachefiles_io_error_obj( + object, + "readpage failed on backing file %lx", + (unsigned long) monitor->back_page->flags); + + page_cache_release(monitor->back_page); + + monitor->end_io_func(monitor->netfs_page, + monitor->context, + error); + + page_cache_release(monitor->netfs_page); + fscache_put_context(cookie, monitor->context); + kfree(monitor); + + /* let keventd have some air occasionally */ + max--; + if (max < 0 || need_resched()) { + if (!list_empty(&object->read_list)) + schedule_work(&object->read_work); + _leave(" [maxed out]"); + return; + } + + spin_lock_irq(&object->work_lock); + } + + spin_unlock_irq(&object->work_lock); + + _leave(""); + +} /* end cachefiles_read_copier_work() */ + +/*****************************************************************************/ +/* + * read the corresponding page to the given set from the backing file + * - an uncertain page is simply discarded, to be tried again another time + */ +static int cachefiles_read_backing_file_one(struct cachefiles_object *object, + fscache_rw_complete_t end_io_func, + void *context, + struct page *netpage, + struct pagevec *lru_pvec) +{ + struct cachefiles_one_read *monitor; + struct address_space *bmapping; + struct page *newpage, *backpage; + int ret; + + _enter(""); + + ASSERTCMP(pagevec_count(lru_pvec), ==, 0); + pagevec_reinit(lru_pvec); + + _debug("read back %p{%lu,%d}", + netpage, netpage->index, page_count(netpage)); + + monitor = kzalloc(sizeof(*monitor), GFP_KERNEL); + if (!monitor) + goto nomem; + + monitor->netfs_page = netpage; + monitor->object = object; + monitor->end_io_func = end_io_func; + monitor->context = fscache_get_context(object->fscache.cookie, + context); + + init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter); + + /* attempt to get hold of the backing page */ + bmapping = object->backer->f_mapping; + newpage = NULL; + + for (;;) { + backpage = find_get_page(bmapping, netpage->index); + if (backpage) + goto backing_page_already_present; + + if (!newpage) { + newpage = page_cache_alloc_cold(bmapping); + if (!newpage) + goto nomem_monitor; + } + + ret = add_to_page_cache(newpage, bmapping, + netpage->index, GFP_KERNEL); + if (ret == 0) + goto installed_new_backing_page; + if (ret != -EEXIST) + goto nomem_page; + } + + /* we've installed a new backing page, so now we need to add it + * to the LRU list and start it reading */ +installed_new_backing_page: + _debug("- new %p", newpage); + + backpage = newpage; + newpage = NULL; + + page_cache_get(backpage); + pagevec_add(lru_pvec, backpage); + __pagevec_lru_add(lru_pvec); + + ret = bmapping->a_ops->readpage(object->backer, backpage); + if (ret < 0) + goto read_error; + + /* set the monitor to transfer the data across */ +monitor_backing_page: + _debug("- monitor add"); + + /* install the monitor */ + page_cache_get(monitor->netfs_page); + page_cache_get(backpage); + monitor->back_page = backpage; + + spin_lock_irq(&object->work_lock); + list_add_tail(&monitor->obj_link, &object->read_pend_list); + spin_unlock_irq(&object->work_lock); + + monitor->monitor.private = backpage; + install_page_waitqueue_monitor(backpage, &monitor->monitor); + monitor = NULL; + + /* but the page may have been read before the monitor was + * installed, so the monitor may miss the event - so we have to + * ensure that we do get one in such a case */ + if (!TestSetPageLocked(backpage)) + unlock_page(backpage); + goto success; + + /* if the backing page is already present, it can be in one of + * three states: read in progress, read failed or read okay */ +backing_page_already_present: + _debug("- present"); + + if (newpage) { + page_cache_release(newpage); + newpage = NULL; + } + + if (PageError(backpage)) + goto io_error; + + if (PageUptodate(backpage)) + goto backing_page_already_uptodate; + + goto monitor_backing_page; + + /* the backing page is already up to date, attach the netfs + * page to the pagecache and LRU and copy the data across */ +backing_page_already_uptodate: + _debug("- uptodate"); + + copy_highpage(netpage, backpage); + end_io_func(netpage, context, 0); + +success: + _debug("success"); + ret = 0; + +out: + if (backpage) + page_cache_release(backpage); + if (monitor) { + fscache_put_context(object->fscache.cookie, monitor->context); + kfree(monitor); + } + + _leave(" = %d", ret); + return ret; + +read_error: + _debug("read error %d", ret); + if (ret == -ENOMEM) + goto out; +io_error: + cachefiles_io_error_obj(object, "page read error on backing file"); + ret = -EIO; + goto out; + +nomem_page: + page_cache_release(newpage); +nomem_monitor: + fscache_put_context(object->fscache.cookie, monitor->context); + kfree(monitor); +nomem: + _leave(" = -ENOMEM"); + return -ENOMEM; + +} /* end cachefiles_read_backing_file_one() */ + +/*****************************************************************************/ +/* + * read a page from the cache or allocate a block in which to store it + * - cache withdrawal is prevented by the caller + * - returns -EINTR if interrupted + * - returns -ENOMEM if ran out of memory + * - returns -ENOBUFS if no buffers can be made available + * - returns -ENOBUFS if page is beyond EOF + * - if the page is backed by a block in the cache: + * - a read will be started which will call the callback on completion + * - 0 will be returned + * - else if the page is unbacked: + * - the metadata will be retained + * - -ENODATA will be returned + */ +static int cachefiles_read_or_alloc_page(struct fscache_object *_object, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + unsigned long gfp) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct fscache_cookie *cookie; + struct pagevec pagevec; + struct inode *inode; + sector_t block0, block; + unsigned shift; + int ret; + + object = container_of(_object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, struct cachefiles_cache, cache); + + _enter("{%p},{%lx},,,", object, page->index); + + if (!object->backer) + return -ENOBUFS; + + inode = object->backer->f_dentry->d_inode; + ASSERT(S_ISREG(inode->i_mode)); + ASSERT(inode->i_mapping->a_ops->bmap); + ASSERT(inode->i_mapping->a_ops->readpages); + + /* calculate the shift required to use bmap */ + if (inode->i_sb->s_blocksize > PAGE_SIZE) + return -ENOBUFS; + + shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; + + cookie = object->fscache.cookie; + + pagevec_init(&pagevec, 0); + + /* we assume the absence or presence of the first block is a good + * enough indication for the page as a whole + * - TODO: don't use bmap() for this as it is _not_ actually good + * enough for this as it doesn't indicate errors, but it's all we've + * got for the moment + */ + block0 = page->index; + block0 <<= shift; + + block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0); + _debug("%llx -> %llx", block0, block); + + if (block) { + /* submit the apparently valid page to the backing fs to be + * read from disk */ + ret = cachefiles_read_backing_file_one(object, + end_io_func, + context, + page, + &pagevec); + ret = 0; + } + else if (cachefiles_has_space(cache, 1) == 0) { + /* there's space in the cache we can use */ + pagevec_add(&pagevec, page); + cookie->def->mark_pages_cached(cookie->netfs_data, + page->mapping, &pagevec); + ret = -ENODATA; + } + else { + ret = -ENOBUFS; + } + + _leave(" = %d", ret); + return ret; + +} /* end cachefiles_read_or_alloc_page() */ + +/*****************************************************************************/ +/* + * read the corresponding pages to the given set from the backing file + * - any uncertain pages are simply discarded, to be tried again another time + */ +static int cachefiles_read_backing_file(struct cachefiles_object *object, + fscache_rw_complete_t end_io_func, + void *context, + struct address_space *mapping, + struct list_head *list, + struct pagevec *lru_pvec) +{ + struct cachefiles_one_read *monitor = NULL; + struct address_space *bmapping = object->backer->f_mapping; + struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; + int ret = 0; + + _enter(""); + + ASSERTCMP(pagevec_count(lru_pvec), ==, 0); + pagevec_reinit(lru_pvec); + + list_for_each_entry_safe(netpage, _n, list, lru) { + list_del(&netpage->lru); + + _debug("read back %p{%lu,%d}", + netpage, netpage->index, page_count(netpage)); + + if (!monitor) { + monitor = kzalloc(sizeof(*monitor), GFP_KERNEL); + if (!monitor) + goto nomem; + + monitor->object = object; + monitor->end_io_func = end_io_func; + monitor->context = fscache_get_context( + object->fscache.cookie, context); + + init_waitqueue_func_entry(&monitor->monitor, + cachefiles_read_waiter); + } + + for (;;) { + backpage = find_get_page(bmapping, netpage->index); + if (backpage) + goto backing_page_already_present; + + if (!newpage) { + newpage = page_cache_alloc_cold(bmapping); + if (!newpage) + goto nomem; + } + + ret = add_to_page_cache(newpage, bmapping, + netpage->index, GFP_KERNEL); + if (ret == 0) + goto installed_new_backing_page; + if (ret != -EEXIST) + goto nomem; + } + + /* we've installed a new backing page, so now we need to add it + * to the LRU list and start it reading */ + installed_new_backing_page: + _debug("- new %p", newpage); + + backpage = newpage; + newpage = NULL; + + page_cache_get(backpage); + if (!pagevec_add(lru_pvec, backpage)) + __pagevec_lru_add(lru_pvec); + + reread_backing_page: + ret = bmapping->a_ops->readpage(object->backer, backpage); + if (ret < 0) + goto read_error; + + /* add the netfs page to the pagecache and LRU, and set the + * monitor to transfer the data across */ + monitor_backing_page: + _debug("- monitor add"); + + ret = add_to_page_cache(netpage, mapping, netpage->index, + GFP_KERNEL); + if (ret < 0) { + if (ret == -EEXIST) { + page_cache_release(netpage); + continue; + } + goto nomem; + } + + page_cache_get(netpage); + if (!pagevec_add(lru_pvec, netpage)) + __pagevec_lru_add(lru_pvec); + + /* install a monitor */ + page_cache_get(netpage); + monitor->netfs_page = netpage; + + page_cache_get(backpage); + monitor->back_page = backpage; + + spin_lock_irq(&object->work_lock); + list_add_tail(&monitor->obj_link, &object->read_pend_list); + spin_unlock_irq(&object->work_lock); + + monitor->monitor.private = backpage; + install_page_waitqueue_monitor(backpage, &monitor->monitor); + monitor = NULL; + + /* but the page may have been read before the monitor was + * installed, so the monitor may miss the event - so we have to + * ensure that we do get one in such a case */ + if (!TestSetPageLocked(backpage)) { + _debug("2unlock %p", backpage); + unlock_page(backpage); + } + + page_cache_release(backpage); + backpage = NULL; + + page_cache_release(netpage); + netpage = NULL; + continue; + + /* if the backing page is already present, it can be in one of + * three states: read in progress, read failed or read okay */ + backing_page_already_present: + _debug("- present %p", backpage); + + if (PageError(backpage)) + goto io_error; + + if (PageUptodate(backpage)) + goto backing_page_already_uptodate; + + _debug("- not ready %p{%lx}", backpage, backpage->flags); + + if (TestSetPageLocked(backpage)) + goto monitor_backing_page; + + if (PageError(backpage)) { + unlock_page(backpage); + goto io_error; + } + + if (PageUptodate(backpage)) + goto backing_page_already_uptodate_unlock; + + /* we've locked a page that's neither up to date nor erroneous, + * so we need to attempt to read it again */ + goto reread_backing_page; + + /* the backing page is already up to date, attach the netfs + * page to the pagecache and LRU and copy the data across */ + backing_page_already_uptodate_unlock: + unlock_page(backpage); + backing_page_already_uptodate: + _debug("- uptodate"); + + ret = add_to_page_cache(netpage, mapping, netpage->index, + GFP_KERNEL); + if (ret < 0) { + if (ret == -EEXIST) { + page_cache_release(netpage); + continue; + } + goto nomem; + } + + copy_highpage(netpage, backpage); + + page_cache_release(backpage); + backpage = NULL; + + page_cache_get(netpage); + if (!pagevec_add(lru_pvec, netpage)) + __pagevec_lru_add(lru_pvec); + + end_io_func(netpage, context, 0); + + page_cache_release(netpage); + netpage = NULL; + continue; + } + + netpage = NULL; + + _debug("out"); + +out: + /* tidy up */ + pagevec_lru_add(lru_pvec); + + if (newpage) + page_cache_release(newpage); + if (netpage) + page_cache_release(netpage); + if (backpage) + page_cache_release(backpage); + if (monitor) { + fscache_put_context(object->fscache.cookie, monitor->context); + kfree(monitor); + } + + list_for_each_entry_safe(netpage, _n, list, lru) { + list_del(&netpage->lru); + page_cache_release(netpage); + } + + _leave(" = %d", ret); + return ret; + +nomem: + _debug("nomem"); + ret = -ENOMEM; + goto out; + +read_error: + _debug("read error %d", ret); + if (ret == -ENOMEM) + goto out; +io_error: + cachefiles_io_error_obj(object, "page read error on backing file"); + ret = -EIO; + goto out; + +} /* end cachefiles_read_backing_file() */ + +/*****************************************************************************/ +/* + * read a list of pages from the cache or allocate blocks in which to store + * them + */ +static int cachefiles_read_or_alloc_pages(struct fscache_object *_object, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages, + fscache_rw_complete_t end_io_func, + void *context, + unsigned long gfp) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct fscache_cookie *cookie; + struct list_head backpages; + struct pagevec pagevec; + struct inode *inode; + struct page *page, *_n; + unsigned shift, nrbackpages; + int ret, ret2, space; + + object = container_of(_object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, struct cachefiles_cache, cache); + + _enter("{%p},,%d,,", object, *nr_pages); + + if (!object->backer) + return -ENOBUFS; + + space = 1; + if (cachefiles_has_space(cache, *nr_pages) < 0) + space = 0; + + inode = object->backer->f_dentry->d_inode; + ASSERT(S_ISREG(inode->i_mode)); + ASSERT(inode->i_mapping->a_ops->bmap); + ASSERT(inode->i_mapping->a_ops->readpages); + + /* calculate the shift required to use bmap */ + if (inode->i_sb->s_blocksize > PAGE_SIZE) + return -ENOBUFS; + + shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; + + pagevec_init(&pagevec, 0); + + cookie = object->fscache.cookie; + + INIT_LIST_HEAD(&backpages); + nrbackpages = 0; + + ret = space ? -ENODATA : -ENOBUFS; + list_for_each_entry_safe(page, _n, pages, lru) { + sector_t block0, block; + + /* we assume the absence or presence of the first block is a + * good enough indication for the page as a whole + * - TODO: don't use bmap() for this as it is _not_ actually + * good enough for this as it doesn't indicate errors, but + * it's all we've got for the moment + */ + block0 = page->index; + block0 <<= shift; + + block = inode->i_mapping->a_ops->bmap(inode->i_mapping, + block0); + _debug("%llx -> %llx", block0, block); + + if (block) { + /* we have data - add it to the list to give to the + * backing fs */ + list_move(&page->lru, &backpages); + (*nr_pages)--; + nrbackpages++; + } + else if (space && pagevec_add(&pagevec, page) == 0) { + cookie->def->mark_pages_cached(cookie->netfs_data, + mapping, &pagevec); + pagevec_reinit(&pagevec); + ret = -ENODATA; + } + } + + if (pagevec_count(&pagevec) > 0) { + cookie->def->mark_pages_cached(cookie->netfs_data, + mapping, &pagevec); + pagevec_reinit(&pagevec); + } + + if (list_empty(pages)) + ret = 0; + + /* submit the apparently valid pages to the backing fs to be read from disk */ + if (nrbackpages > 0) { + ret2 = cachefiles_read_backing_file(object, + end_io_func, + context, + mapping, + &backpages, + &pagevec); + + ASSERTCMP(pagevec_count(&pagevec), ==, 0); + + if (ret2 == -ENOMEM || ret2 == -EINTR) + ret = ret2; + } + + _leave(" = %d [nr=%u%s]", + ret, *nr_pages, list_empty(pages) ? " empty" : ""); + return ret; + +} /* end cachefiles_read_or_alloc_pages() */ + +/*****************************************************************************/ +/* + * read a page from the cache or allocate a block in which to store it + * - cache withdrawal is prevented by the caller + * - returns -EINTR if interrupted + * - returns -ENOMEM if ran out of memory + * - returns -ENOBUFS if no buffers can be made available + * - returns -ENOBUFS if page is beyond EOF + * - otherwise: + * - the metadata will be retained + * - 0 will be returned + */ +static int cachefiles_allocate_page(struct fscache_object *_object, + struct page *page, + unsigned long gfp) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + + object = container_of(_object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + _enter("%p,{%lx},,,", object, page->index); + + return cachefiles_has_space(cache, 1); + +} /* end cachefiles_allocate_page() */ + +/*****************************************************************************/ +/* + * page storer + */ +void cachefiles_write_work(void *_object) +{ + struct cachefiles_one_write *writer; + struct cachefiles_object *object = _object; + int ret, max; + + _enter("%p", object); + + ASSERT(!irqs_disabled()); + + spin_lock_irq(&object->work_lock); + max = 8; + + while (!list_empty(&object->write_list)) { + writer = list_entry(object->write_list.next, + struct cachefiles_one_write, obj_link); + list_del(&writer->obj_link); + + spin_unlock_irq(&object->work_lock); + + _debug("- store {%lu}", writer->netfs_page->index); + + ret = generic_file_buffered_write_one_kernel_page( + object->backer, + writer->netfs_page->index, + writer->netfs_page); + + if (ret == -ENOSPC) { + ret = -ENOBUFS; + } + else if (ret == -EIO) { + cachefiles_io_error_obj(object, + "write page to backing file" + " failed"); + ret = -ENOBUFS; + } + + _debug("- callback"); + writer->end_io_func(writer->netfs_page, + writer->context, + ret); + + _debug("- put net"); + page_cache_release(writer->netfs_page); + fscache_put_context(object->fscache.cookie, writer->context); + kfree(writer); + + /* let keventd have some air occasionally */ + max--; + if (max < 0 || need_resched()) { + if (!list_empty(&object->write_list)) + schedule_work(&object->write_work); + _leave(" [maxed out]"); + return; + } + + _debug("- next"); + spin_lock_irq(&object->work_lock); + } + + spin_unlock_irq(&object->work_lock); + _leave(""); + +} /* end cachefiles_write_work() */ + +/*****************************************************************************/ +/* + * request a page be stored in the cache + * - cache withdrawal is prevented by the caller + * - this request may be ignored if there's no cache block available, in which + * case -ENOBUFS will be returned + * - if the op is in progress, 0 will be returned + */ +static int cachefiles_write_page(struct fscache_object *_object, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + unsigned long gfp) +{ +// struct cachefiles_one_write *writer; + struct cachefiles_object *object; + int ret; + + object = container_of(_object, struct cachefiles_object, fscache); + + _enter("%p,%p{%lx},,,", object, page, page->index); + + if (!object->backer) + return -ENOBUFS; + + ASSERT(S_ISREG(object->backer->f_dentry->d_inode->i_mode)); + +#if 0 // set to 1 for deferred writing + /* queue the operation for deferred processing by keventd */ + writer = kzalloc(sizeof(*writer), GFP_KERNEL); + if (!writer) + return -ENOMEM; + + page_cache_get(page); + writer->netfs_page = page; + writer->object = object; + writer->end_io_func = end_io_func; + writer->context = facache_get_context(object->fscache.cookie, context); + + spin_lock_irq(&object->work_lock); + list_add_tail(&writer->obj_link, &object->write_list); + spin_unlock_irq(&object->work_lock); + + schedule_work(&object->write_work); + ret = 0; + +#else + /* copy the page to ext3 and let it store it in its own time */ + ret = generic_file_buffered_write_one_kernel_page(object->backer, + page->index, + page); + + if (ret != 0) { + if (ret == -EIO) + cachefiles_io_error_obj(object, + "write page to backing file" + " failed"); + ret = -ENOBUFS; + } + + end_io_func(page, context, ret); +#endif + + _leave(" = %d", ret); + return ret; + +} /* end cachefiles_write_page() */ + +/*****************************************************************************/ +/* + * detach a backing block from a page + * - cache withdrawal is prevented by the caller + */ +static void cachefiles_uncache_pages(struct fscache_object *_object, + struct pagevec *pagevec) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + + object = container_of(_object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + _enter("%p,{%lu,%lx},,,", + object, pagevec->nr, pagevec->pages[0]->index); + +} /* end cachefiles_uncache_pages() */ + +/*****************************************************************************/ +/* + * dissociate a cache from all the pages it was backing + */ +static void cachefiles_dissociate_pages(struct fscache_cache *cache) +{ + _enter(""); + +} /* end cachefiles_dissociate_pages() */ + +struct fscache_cache_ops cachefiles_cache_ops = { + .name = "cachefiles", + .lookup_object = cachefiles_lookup_object, + .grab_object = cachefiles_grab_object, + .lock_object = cachefiles_lock_object, + .unlock_object = cachefiles_unlock_object, + .update_object = cachefiles_update_object, + .put_object = cachefiles_put_object, + .sync_cache = cachefiles_sync_cache, + .set_i_size = cachefiles_set_i_size, + .read_or_alloc_page = cachefiles_read_or_alloc_page, + .read_or_alloc_pages = cachefiles_read_or_alloc_pages, + .allocate_page = cachefiles_allocate_page, + .write_page = cachefiles_write_page, + .uncache_pages = cachefiles_uncache_pages, + .dissociate_pages = cachefiles_dissociate_pages, +}; diff --git a/fs/cachefiles/cf-key.c b/fs/cachefiles/cf-key.c new file mode 100644 index 0000000..86d7fe7 --- /dev/null +++ b/fs/cachefiles/cf-key.c @@ -0,0 +1,160 @@ +/* cf-key.c: Key to pathname encoder + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include "internal.h" + +static const char cachefiles_charmap[64] = + "0123456789" /* 0 - 9 */ + "abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */ + "_-" /* 62 - 63 */ + ; + +static const char cachefiles_filecharmap[256] = { + /* we skip space and tab and control chars */ + [ 33 ... 46 ] = 1, /* '!' -> '.' */ + /* we skip '/' as it's significant to pathwalk */ + [ 48 ... 127 ] = 1, /* '0' -> '~' */ +}; + +/*****************************************************************************/ +/* + * turn the raw key into something cooked + * - the raw key should include the length in the two bytes at the front + * - the key may be up to 514 bytes in length (including the length word) + * - "base64" encode the strange keys, mapping 3 bytes of raw to four of + * cooked + * - need to cut the cooked key into 252 char lengths (189 raw bytes) + */ +char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type) +{ + unsigned char csum, ch; + unsigned int acc; + char *key; + int loop, len, max, seg, mark, print; + + _enter(",%d", keylen); + + BUG_ON(keylen < 2 || keylen > 514); + + csum = raw[0] + raw[1]; + print = 1; + for (loop = 2; loop < keylen; loop++) { + ch = raw[loop]; + csum += ch; + print &= cachefiles_filecharmap[ch]; + } + + if (print) { + /* if the path is usable ASCII, then we render it directly */ + max = keylen - 2; + max += 2; /* two base64'd length chars on the front */ + max += 5; /* @checksum/M */ + max += 3 * 2; /* maximum number of segment dividers (".../M") + * is ((514 + 251) / 252) = 3 + */ + max += 1; /* NUL on end */ + } + else { + /* calculate the maximum length of the cooked key */ + keylen = (keylen + 2) / 3; + + max = keylen * 4; + max += 5; /* @checksum/M */ + max += 3 * 2; /* maximum number of segment dividers (".../M") + * is ((514 + 188) / 189) = 3 + */ + max += 1; /* NUL on end */ + } + + _debug("max: %d", max); + + key = kmalloc(max, GFP_KERNEL); + if (!key) + return NULL; + + len = 0; + + /* build the cooked key */ + sprintf(key, "@%02x/+", (unsigned) csum); + len = 5; + mark = len - 1; + + if (print) { + acc = *(uint16_t *) raw; + raw += 2; + + key[len + 1] = cachefiles_charmap[acc & 63]; + acc >>= 6; + key[len] = cachefiles_charmap[acc & 63]; + len += 2; + + seg = 250; + for (loop = keylen; loop > 0; loop--) { + if (seg <= 0) { + key[len++] = '/'; + mark = len; + key[len++] = '+'; + seg = 252; + } + + key[len++] = *raw++; + ASSERT(len < max); + } + + switch (type) { + case FSCACHE_COOKIE_TYPE_INDEX: type = 'I'; break; + case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'D'; break; + default: type = 'S'; break; + } + } + else { + seg = 252; + for (loop = keylen; loop > 0; loop--) { + if (seg <= 0) { + key[len++] = '/'; + mark = len; + key[len++] = '+'; + seg = 252; + } + + acc = *raw++; + acc |= *raw++ << 8; + acc |= *raw++ << 16; + + _debug("acc: %06x", acc); + + key[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + key[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + key[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + key[len++] = cachefiles_charmap[acc & 63]; + + ASSERT(len < max); + } + + switch (type) { + case FSCACHE_COOKIE_TYPE_INDEX: type = 'J'; break; + case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'E'; break; + default: type = 'T'; break; + } + } + + key[mark] = type; + key[len] = 0; + + _leave(" = %p %d:[%s]", key, len, key); + return key; + +} /* end cachefiles_cook_key() */ diff --git a/fs/cachefiles/cf-main.c b/fs/cachefiles/cf-main.c new file mode 100644 index 0000000..660b544 --- /dev/null +++ b/fs/cachefiles/cf-main.c @@ -0,0 +1,131 @@ +/* cf-main.c: network filesystem caching backend to use cache files on a + * premounted filesystem + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +unsigned long cachefiles_debug; + +static int cachefiles_init(void); +static void cachefiles_exit(void); + +fs_initcall(cachefiles_init); +module_exit(cachefiles_exit); + +MODULE_DESCRIPTION("Mounted-filesystem based cache"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +kmem_cache_t *cachefiles_object_jar; + +static void cachefiles_object_init_once(void *_object, kmem_cache_t *cachep, + unsigned long flags) +{ + struct cachefiles_object *object = _object; + + switch (flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) { + case SLAB_CTOR_CONSTRUCTOR: + memset(object, 0, sizeof(*object)); + fscache_object_init(&object->fscache); + init_rwsem(&object->sem); + spin_lock_init(&object->work_lock); + INIT_LIST_HEAD(&object->read_list); + INIT_LIST_HEAD(&object->read_pend_list); + INIT_WORK(&object->read_work, &cachefiles_read_copier_work, + object); + INIT_LIST_HEAD(&object->write_list); + INIT_WORK(&object->write_work, &cachefiles_write_work, object); + break; + + default: + break; + } +} + +/*****************************************************************************/ +/* + * initialise the fs caching module + */ +static int __init cachefiles_init(void) +{ + struct proc_dir_entry *pde; + int ret; + + /* create a proc entry to use as a handle for the userspace daemon */ + ret = -ENOMEM; + + pde = create_proc_entry("cachefiles", 0600, proc_root_fs); + if (!pde) { + kerror("Unable to create /proc/fs/cachefiles"); + goto error_proc; + } + + if (cachefiles_sysctl_init() < 0) { + kerror("Unable to create sysctl parameters"); + goto error_object_jar; + } + + pde->owner = THIS_MODULE; + pde->proc_fops = &cachefiles_proc_fops; + cachefiles_proc = pde; + + /* create an object jar */ + cachefiles_object_jar = + kmem_cache_create("cachefiles_object_jar", + sizeof(struct cachefiles_object), + 0, + SLAB_HWCACHE_ALIGN, + cachefiles_object_init_once, + NULL); + if (!cachefiles_object_jar) { + printk(KERN_NOTICE + "CacheFiles: Failed to allocate an object jar\n"); + goto error_sysctl; + } + + printk(KERN_INFO "CacheFiles: Loaded\n"); + return 0; + +error_sysctl: + cachefiles_sysctl_cleanup(); +error_object_jar: + remove_proc_entry("cachefiles", proc_root_fs); +error_proc: + kerror("failed to register: %d", ret); + return ret; + +} /* end cachefiles_init() */ + +/*****************************************************************************/ +/* + * clean up on module removal + */ +static void __exit cachefiles_exit(void) +{ + printk(KERN_INFO "CacheFiles: Unloading\n"); + + kmem_cache_destroy(cachefiles_object_jar); + remove_proc_entry("cachefiles", proc_root_fs); + cachefiles_sysctl_cleanup(); + +} /* end cachefiles_exit() */ diff --git a/fs/cachefiles/cf-namei.c b/fs/cachefiles/cf-namei.c new file mode 100644 index 0000000..45e2359 --- /dev/null +++ b/fs/cachefiles/cf-namei.c @@ -0,0 +1,837 @@ +/* cf-namei.c: CacheFiles path walking and related routines + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +/*****************************************************************************/ +/* + * record the fact that an object is now active + */ +static void cachefiles_mark_object_active(struct cachefiles_cache *cache, + struct cachefiles_object *object) +{ + struct cachefiles_object *xobject; + struct rb_node **_p, *_parent = NULL; + struct dentry *dentry; + + write_lock(&cache->active_lock); + + dentry = object->dentry; + _p = &cache->active_nodes.rb_node; + while (*_p) { + _parent = *_p; + xobject = rb_entry(_parent, + struct cachefiles_object, active_node); + + if (xobject->dentry > dentry) + _p = &(*_p)->rb_left; + else if (xobject->dentry < dentry) + _p = &(*_p)->rb_right; + else + BUG(); /* uh oh... this dentry shouldn't be here */ + } + + rb_link_node(&object->active_node, _parent, _p); + rb_insert_color(&object->active_node, &cache->active_nodes); + + write_unlock(&cache->active_lock); + +} /* end cachefiles_mark_object_active() */ + +/*****************************************************************************/ +/* + * delete an object representation from the cache + * - file backed objects are unlinked + * - directory backed objects are stuffed into the graveyard for userspace to + * delete + * - unlocks the directory mutex + */ +static int cachefiles_bury_object(struct cachefiles_cache *cache, + struct dentry *dir, + struct dentry *rep) +{ + struct dentry *grave, *alt, *trap; + struct qstr name; + const char *old_name; + char nbuffer[8 + 8 + 1]; + int ret; + + _enter(",'%*.*s','%*.*s'", + dir->d_name.len, dir->d_name.len, dir->d_name.name, + rep->d_name.len, rep->d_name.len, rep->d_name.name); + + /* non-directories can just be unlinked */ + if (!S_ISDIR(rep->d_inode->i_mode)) { + _debug("unlink stale object"); + ret = dir->d_inode->i_op->unlink(dir->d_inode, rep); + + mutex_unlock(&dir->d_inode->i_mutex); + + if (ret == 0) { + _debug("d_delete"); + d_delete(rep); + } + else if (ret == -EIO) { + cachefiles_io_error(cache, "Unlink failed"); + } + + _leave(" = %d", ret); + return ret; + } + + /* directories have to be moved to the graveyard */ + _debug("move stale object to graveyard"); + mutex_unlock(&dir->d_inode->i_mutex); + +try_again: + /* first step is to make up a grave dentry in the graveyard */ + sprintf(nbuffer, "%08x%08x", + (uint32_t) xtime.tv_sec, + (uint32_t) atomic_inc_return(&cache->gravecounter)); + + name.name = nbuffer; + name.len = strlen(name.name); + + /* hash the name */ + name.hash = full_name_hash(name.name, name.len); + + if (dir->d_op && dir->d_op->d_hash) { + ret = dir->d_op->d_hash(dir, &name); + if (ret < 0) { + if (ret == -EIO) + cachefiles_io_error(cache, "Hash failed"); + + _leave(" = %d", ret); + return ret; + } + } + + /* do the multiway lock magic */ + trap = lock_rename(cache->graveyard, dir); + + /* do some checks before getting the grave dentry */ + if (rep->d_parent != dir) { + /* the entry was probably culled when we dropped the parent dir + * lock */ + unlock_rename(cache->graveyard, dir); + _leave(" = 0 [culled?]"); + return 0; + } + + if (!S_ISDIR(cache->graveyard->d_inode->i_mode)) { + unlock_rename(cache->graveyard, dir); + cachefiles_io_error(cache, "Graveyard no longer a directory"); + return -EIO; + } + + if (trap == rep) { + unlock_rename(cache->graveyard, dir); + cachefiles_io_error(cache, "May not make directory loop"); + return -EIO; + } + + if (d_mountpoint(rep)) { + unlock_rename(cache->graveyard, dir); + cachefiles_io_error(cache, "Mountpoint in cache"); + return -EIO; + } + + /* see if there's a dentry already there for this name */ + grave = d_lookup(cache->graveyard, &name); + if (!grave) { + _debug("not found"); + + grave = d_alloc(cache->graveyard, &name); + if (!grave) { + unlock_rename(cache->graveyard, dir); + _leave(" = -ENOMEM"); + return -ENOMEM; + } + + alt = cache->graveyard->d_inode->i_op->lookup( + cache->graveyard->d_inode, grave, NULL); + if (IS_ERR(alt)) { + unlock_rename(cache->graveyard, dir); + dput(grave); + + if (PTR_ERR(alt) == -ENOMEM) { + _leave(" = -ENOMEM"); + return -ENOMEM; + } + + cachefiles_io_error(cache, "Lookup error %ld", + PTR_ERR(alt)); + return -EIO; + } + + if (alt) { + dput(grave); + grave = alt; + } + } + + if (grave->d_inode) { + unlock_rename(cache->graveyard, dir); + dput(grave); + grave = NULL; + cond_resched(); + goto try_again; + } + + if (d_mountpoint(grave)) { + unlock_rename(cache->graveyard, dir); + dput(grave); + cachefiles_io_error(cache, "Mountpoint in graveyard"); + return -EIO; + } + + /* target should not be an ancestor of source */ + if (trap == grave) { + unlock_rename(cache->graveyard, dir); + dput(grave); + cachefiles_io_error(cache, "May not make directory loop"); + return -EIO; + } + + /* attempt the rename */ + DQUOT_INIT(dir->d_inode); + DQUOT_INIT(cache->graveyard->d_inode); + + old_name = fsnotify_oldname_init(rep->d_name.name); + + ret = dir->d_inode->i_op->rename(dir->d_inode, rep, + cache->graveyard->d_inode, grave); + + if (ret == 0) { + d_move(rep, grave); + fsnotify_move(dir->d_inode, cache->graveyard->d_inode, + old_name, rep->d_name.name, 1, + grave->d_inode, rep->d_inode); + } + else if (ret != -ENOMEM) { + cachefiles_io_error(cache, "Rename failed with error %d", ret); + } + + fsnotify_oldname_free(old_name); + + unlock_rename(cache->graveyard, dir); + dput(grave); + _leave(" = 0"); + return 0; + +} /* end cachefiles_bury_object() */ + +/*****************************************************************************/ +/* + * delete an object representation from the cache + */ +int cachefiles_delete_object(struct cachefiles_cache *cache, + struct cachefiles_object *object) +{ + struct dentry *dir; + int ret; + + _enter(",{%p}", object->dentry); + + ASSERT(object->dentry); + ASSERT(object->dentry->d_inode); + ASSERT(object->dentry->d_parent); + + dir = dget_parent(object->dentry); + + mutex_lock(&dir->d_inode->i_mutex); + ret = cachefiles_bury_object(cache, dir, object->dentry); + + dput(dir); + _leave(" = %d", ret); + return ret; + +} /* end cachefiles_delete_object() */ + +/*****************************************************************************/ +/* + * walk from the parent object to the child object through the backing + * filesystem, creating directories as we go + */ +int cachefiles_walk_to_object(struct cachefiles_object *parent, + struct cachefiles_object *object, + char *key, + struct cachefiles_xattr *auxdata) +{ + struct cachefiles_cache *cache; + struct dentry *dir, *next = NULL, *new; + struct file *file; + struct qstr name; + uid_t fsuid; + gid_t fsgid; + int ret; + + _enter("{%p}", parent->dentry); + + cache = container_of(parent->fscache.cache, + struct cachefiles_cache, cache); + + ASSERT(parent->dentry); + ASSERT(parent->dentry->d_inode); + + if (!(S_ISDIR(parent->dentry->d_inode->i_mode))) { + // TODO: convert file to dir + _leave("looking up in none directory"); + return -ENOBUFS; + } + + fsuid = current->fsuid; + fsgid = current->fsgid; + current->fsuid = 0; + current->fsgid = 0; + + dir = dget(parent->dentry); + +advance: + /* attempt to transit the first directory component */ + name.name = key; + key = strchr(key, '/'); + if (key) { + name.len = key - (char *) name.name; + *key++ = 0; + } + else { + name.len = strlen(name.name); + } + + /* hash the name */ + name.hash = full_name_hash(name.name, name.len); + + if (dir->d_op && dir->d_op->d_hash) { + ret = dir->d_op->d_hash(dir, &name); + if (ret < 0) { + cachefiles_io_error(cache, "Hash failed"); + goto error_out2; + } + } + +lookup_again: + /* search the current directory for the element name */ + _debug("lookup '%s' %x", name.name, name.hash); + + mutex_lock(&dir->d_inode->i_mutex); + + next = d_lookup(dir, &name); + if (!next) { + _debug("not found"); + + new = d_alloc(dir, &name); + if (!new) + goto nomem_d_alloc; + + ASSERT(dir->d_inode->i_op); + ASSERT(dir->d_inode->i_op->lookup); + + next = dir->d_inode->i_op->lookup(dir->d_inode, new, NULL); + if (IS_ERR(next)) + goto lookup_error; + + if (!next) + next = new; + else + dput(new); + + if (next->d_inode) { + ret = -EPERM; + if (!next->d_inode->i_op || + !next->d_inode->i_op->setxattr || + !next->d_inode->i_op->getxattr || + !next->d_inode->i_op->removexattr) + goto error; + + if (key && (!next->d_inode->i_op->lookup || + !next->d_inode->i_op->mkdir || + !next->d_inode->i_op->create || + !next->d_inode->i_op->rename || + !next->d_inode->i_op->rmdir || + !next->d_inode->i_op->unlink)) + goto error; + } + } + + _debug("next -> %p %s", next, next->d_inode ? "positive" : "negative"); + + if (!key) + object->new = !next->d_inode; + + /* we need to create the object if it's negative */ + if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) { + /* index objects and intervening tree levels must be subdirs */ + if (!next->d_inode) { + DQUOT_INIT(dir->d_inode); + ret = dir->d_inode->i_op->mkdir(dir->d_inode, next, 0); + if (ret < 0) + goto create_error; + + ASSERT(next->d_inode); + + fsnotify_mkdir(dir->d_inode, next); + + _debug("mkdir -> %p{%p{ino=%lu}}", + next, next->d_inode, next->d_inode->i_ino); + } + /* we need to make sure a positive match found a directory */ + else if (!S_ISDIR(next->d_inode->i_mode)) { + kerror("inode %lu is not a directory", + next->d_inode->i_ino); + ret = -ENOBUFS; + goto error; + } + } + else { + /* non-index objects start out life as files */ + if (!next->d_inode) { + DQUOT_INIT(dir->d_inode); + ret = dir->d_inode->i_op->create(dir->d_inode, next, + S_IFREG, NULL); + if (ret < 0) + goto create_error; + + ASSERT(next->d_inode); + + fsnotify_create(dir->d_inode, next); + + _debug("create -> %p{%p{ino=%lu}}", + next, next->d_inode, next->d_inode->i_ino); + } + /* we need to make sure a positive match found a directory or a + * file */ + else if (!S_ISDIR(next->d_inode->i_mode) && + !S_ISREG(next->d_inode->i_mode) + ) { + kerror("inode %lu is not a file or directory", + next->d_inode->i_ino); + ret = -ENOBUFS; + goto error; + } + } + + /* process the next component */ + if (key) { + _debug("advance"); + mutex_unlock(&dir->d_inode->i_mutex); + dput(dir); + dir = next; + next = NULL; + goto advance; + } + + /* we've found the object we were looking for */ + object->dentry = next; + + /* if we've found that the terminal object exists, then we need to + * check its attributes and delete it if it's out of date */ + if (!object->new) { + _debug("validate '%*.*s'", + next->d_name.len, next->d_name.len, next->d_name.name); + + ret = cachefiles_check_object_xattr(object, auxdata); + if (ret == -ESTALE) { + /* delete the object (the deleter drops the directory + * mutex) */ + object->dentry = NULL; + + ret = cachefiles_bury_object(cache, dir, next); + dput(next); + next = NULL; + + if (ret < 0) + goto delete_error; + + _debug("redo lookup"); + goto lookup_again; + } + } + + /* note that we're now using this object */ + cachefiles_mark_object_active(cache, object); + + mutex_unlock(&dir->d_inode->i_mutex); + dput(dir); + dir = NULL; + + if (object->new) { + /* attach data to a newly constructed terminal object */ + ret = cachefiles_set_object_xattr(object, auxdata); + if (ret < 0) + goto check_error; + } + else { + /* always update the atime on an object we've just looked up + * (this is used to keep track of culling, and atimes are only + * updated by read, write and readdir but not lookup or + * open) */ + touch_atime(cache->mnt, next); + } + + /* open a file interface onto a data file */ + if (object->type != FSCACHE_COOKIE_TYPE_INDEX) { + if (S_ISREG(object->dentry->d_inode->i_mode)) { + file = dentry_open_kernel(dget(object->dentry), + mntget(cache->mnt), 0); + if (IS_ERR(file)) + goto open_error; + + object->backer = file; + } + else { + BUG(); // TODO: open file in data-class subdir + } + } + + current->fsuid = fsuid; + current->fsgid = fsgid; + object->new = 0; + + _leave(" = 0 [%lu]", object->dentry->d_inode->i_ino); + return 0; + +create_error: + if (ret == -EIO) + cachefiles_io_error(cache, "create/mkdir failed"); + goto error; + +open_error: + ret = PTR_ERR(file); +check_error: + write_lock(&cache->active_lock); + rb_erase(&object->active_node, &cache->active_nodes); + write_unlock(&cache->active_lock); + + dput(object->dentry); + object->dentry = NULL; + goto error_out; + +delete_error: + _debug("delete error %d", ret); + goto error_out2; + +lookup_error: + _debug("lookup error %ld", PTR_ERR(next)); + dput(new); + ret = PTR_ERR(next); + if (ret == -EIO) + cachefiles_io_error(cache, "Lookup failed"); + next = NULL; + goto error; + +nomem_d_alloc: + ret = -ENOMEM; +error: + mutex_unlock(&dir->d_inode->i_mutex); + dput(next); +error_out2: + dput(dir); +error_out: + current->fsuid = fsuid; + current->fsgid = fsgid; + + _leave(" = ret"); + return ret; + +} /* end cachefiles_walk_to_object() */ + +/*****************************************************************************/ +/* + * get a subdirectory + */ +struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, + struct dentry *dir, + const char *dirname) +{ + struct dentry *subdir, *new; + struct qstr name; + uid_t fsuid; + gid_t fsgid; + int ret; + + _enter(""); + + /* set up the name */ + name.name = dirname; + name.len = strlen(dirname); + name.hash = full_name_hash(name.name, name.len); + + if (dir->d_op && dir->d_op->d_hash) { + ret = dir->d_op->d_hash(dir, &name); + if (ret < 0) { + if (ret == -EIO) + kerror("Hash failed"); + _leave(" = %d", ret); + return ERR_PTR(ret); + } + } + + /* search the current directory for the element name */ + _debug("lookup '%s' %x", name.name, name.hash); + + fsuid = current->fsuid; + fsgid = current->fsgid; + current->fsuid = 0; + current->fsgid = 0; + + mutex_lock(&dir->d_inode->i_mutex); + + subdir = d_lookup(dir, &name); + if (!subdir) { + _debug("not found"); + + new = d_alloc(dir, &name); + if (!new) + goto nomem_d_alloc; + + subdir = dir->d_inode->i_op->lookup(dir->d_inode, new, NULL); + if (IS_ERR(subdir)) + goto lookup_error; + + if (!subdir) + subdir = new; + else + dput(new); + } + + _debug("subdir -> %p %s", + subdir, subdir->d_inode ? "positive" : "negative"); + + /* we need to create the subdir if it doesn't exist yet */ + if (!subdir->d_inode) { + DQUOT_INIT(dir->d_inode); + ret = dir->d_inode->i_op->mkdir(dir->d_inode, subdir, 0700); + if (ret < 0) + goto mkdir_error; + + ASSERT(subdir->d_inode); + + fsnotify_mkdir(dir->d_inode, subdir); + + _debug("mkdir -> %p{%p{ino=%lu}}", + subdir, + subdir->d_inode, + subdir->d_inode->i_ino); + } + + mutex_unlock(&dir->d_inode->i_mutex); + + current->fsuid = fsuid; + current->fsgid = fsgid; + + /* we need to make sure the subdir is a directory */ + ASSERT(subdir->d_inode); + + if (!S_ISDIR(subdir->d_inode->i_mode)) { + kerror("%s is not a directory", dirname); + ret = -EIO; + goto check_error; + } + + ret = -EPERM; + if (!subdir->d_inode->i_op || + !subdir->d_inode->i_op->setxattr || + !subdir->d_inode->i_op->getxattr || + !subdir->d_inode->i_op->lookup || + !subdir->d_inode->i_op->mkdir || + !subdir->d_inode->i_op->create || + !subdir->d_inode->i_op->rename || + !subdir->d_inode->i_op->rmdir || + !subdir->d_inode->i_op->unlink) + goto check_error; + + _leave(" = [%lu]", subdir->d_inode->i_ino); + return subdir; + +check_error: + dput(subdir); + _leave(" = %d [check]", ret); + return ERR_PTR(ret); + +mkdir_error: + mutex_unlock(&dir->d_inode->i_mutex); + kerror("mkdir %s failed with error %d", dirname, ret); + goto error_out; + +lookup_error: + mutex_unlock(&dir->d_inode->i_mutex); + dput(new); + ret = PTR_ERR(subdir); + kerror("Lookup %s failed with error %d", dirname, ret); + goto error_out; + +nomem_d_alloc: + mutex_unlock(&dir->d_inode->i_mutex); + ret = -ENOMEM; + goto error_out; + +error_out: + current->fsuid = fsuid; + current->fsgid = fsgid; + _leave(" = %d", ret); + return ERR_PTR(ret); + +} /* end cachefiles_get_directory() */ + +/*****************************************************************************/ +/* + * cull an object if it's not in use + * - called only by cache manager daemon + */ +int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, + char *filename) +{ + struct cachefiles_object *object; + struct rb_node *_n; + struct dentry *victim, *new; + struct qstr name; + int ret; + + _enter(",%*.*s/,%s", + dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); + + /* set up the name */ + name.name = filename; + name.len = strlen(filename); + name.hash = full_name_hash(name.name, name.len); + + if (dir->d_op && dir->d_op->d_hash) { + ret = dir->d_op->d_hash(dir, &name); + if (ret < 0) { + if (ret == -EIO) + cachefiles_io_error(cache, "Hash failed"); + _leave(" = %d", ret); + return ret; + } + } + + /* look up the victim */ + mutex_lock(&dir->d_inode->i_mutex); + + victim = d_lookup(dir, &name); + if (!victim) { + _debug("not found"); + + new = d_alloc(dir, &name); + if (!new) + goto nomem_d_alloc; + + victim = dir->d_inode->i_op->lookup(dir->d_inode, new, NULL); + if (IS_ERR(victim)) + goto lookup_error; + + if (!victim) + victim = new; + else + dput(new); + } + + _debug("victim -> %p %s", victim, victim->d_inode ? "positive" : "negative"); + + /* if the object is no longer there then we probably retired the object + * at the netfs's request whilst the cull was in progress + */ + if (!victim->d_inode) { + mutex_unlock(&dir->d_inode->i_mutex); + dput(victim); + _leave(" = -ENOENT [absent]"); + return -ENOENT; + } + + /* check to see if we're using this object */ + read_lock(&cache->active_lock); + + _n = cache->active_nodes.rb_node; + + while (_n) { + object = rb_entry(_n, struct cachefiles_object, active_node); + + if (object->dentry > victim) + _n = _n->rb_left; + else if (object->dentry < victim) + _n = _n->rb_right; + else + goto object_in_use; + } + + read_unlock(&cache->active_lock); + + /* okay... the victim is not being used so we can cull it + * - start by marking it as stale + */ + _debug("victim is cullable"); + + ret = cachefiles_remove_object_xattr(cache, victim); + if (ret < 0) + goto error_unlock; + + /* actually remove the victim (drops the dir mutex) */ + _debug("bury"); + + ret = cachefiles_bury_object(cache, dir, victim); + if (ret < 0) + goto error; + + dput(victim); + _leave(" = 0"); + return 0; + + +object_in_use: + read_unlock(&cache->active_lock); + mutex_unlock(&dir->d_inode->i_mutex); + dput(victim); + _leave(" = -EBUSY [in use]"); + return -EBUSY; + +nomem_d_alloc: + mutex_unlock(&dir->d_inode->i_mutex); + _leave(" = -ENOMEM"); + return -ENOMEM; + +lookup_error: + mutex_unlock(&dir->d_inode->i_mutex); + dput(new); + ret = PTR_ERR(victim); + if (ret == -EIO) + cachefiles_io_error(cache, "Lookup failed"); + goto choose_error; + +error_unlock: + mutex_unlock(&dir->d_inode->i_mutex); +error: + dput(victim); +choose_error: + if (ret == -ENOENT) { + /* file or dir now absent - probably retired by netfs */ + _leave(" = -ESTALE [absent]"); + return -ESTALE; + } + + if (ret != -ENOMEM) { + kerror("Internal error: %d", ret); + ret = -EIO; + } + + _leave(" = %d", ret); + return ret; + +} /* end cachefiles_cull() */ diff --git a/fs/cachefiles/cf-proc.c b/fs/cachefiles/cf-proc.c new file mode 100644 index 0000000..fce1385 --- /dev/null +++ b/fs/cachefiles/cf-proc.c @@ -0,0 +1,510 @@ +/* cf-proc.c: /proc/fs/cachefiles interface + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +static int cachefiles_proc_open(struct inode *, struct file *); +static int cachefiles_proc_release(struct inode *, struct file *); +static ssize_t cachefiles_proc_read(struct file *, char __user *, size_t, loff_t *); +static ssize_t cachefiles_proc_write(struct file *, const char __user *, size_t, loff_t *); +static int cachefiles_proc_brun(struct cachefiles_cache *cache, char *args); +static int cachefiles_proc_bcull(struct cachefiles_cache *cache, char *args); +static int cachefiles_proc_bstop(struct cachefiles_cache *cache, char *args); +static int cachefiles_proc_cull(struct cachefiles_cache *cache, char *args); +static int cachefiles_proc_debug(struct cachefiles_cache *cache, char *args); +static int cachefiles_proc_dir(struct cachefiles_cache *cache, char *args); +static int cachefiles_proc_tag(struct cachefiles_cache *cache, char *args); + +struct proc_dir_entry *cachefiles_proc; + +static unsigned long cachefiles_open; + +struct file_operations cachefiles_proc_fops = { + .open = cachefiles_proc_open, + .release = cachefiles_proc_release, + .read = cachefiles_proc_read, + .write = cachefiles_proc_write, +}; + +struct cachefiles_proc_cmd { + char name[8]; + int (*handler)(struct cachefiles_cache *cache, char *args); +}; + +static const struct cachefiles_proc_cmd cachefiles_proc_cmds[] = { + { "bind", cachefiles_proc_bind }, + { "brun", cachefiles_proc_brun }, + { "bcull", cachefiles_proc_bcull }, + { "bstop", cachefiles_proc_bstop }, + { "cull", cachefiles_proc_cull }, + { "debug", cachefiles_proc_debug }, + { "dir", cachefiles_proc_dir }, + { "tag", cachefiles_proc_tag }, + { "", NULL } +}; + + +/*****************************************************************************/ +/* + * do various checks + */ +static int cachefiles_proc_open(struct inode *inode, struct file *file) +{ + struct cachefiles_cache *cache; + + _enter(""); + + /* only the superuser may do this */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* /proc/fs/cachefiles may only be open once at a time */ + if (xchg(&cachefiles_open, 1) == 1) + return -EBUSY; + + /* allocate a cache record */ + cache = kzalloc(sizeof(struct cachefiles_cache), GFP_KERNEL); + if (!cache) { + cachefiles_open = 0; + return -ENOMEM; + } + + cache->active_nodes = RB_ROOT; + rwlock_init(&cache->active_lock); + + /* set default caching limits + * - limit at 1% free space + * - cull below 5% free space + * - cease culling above 7% free space + */ + cache->brun_percent = 7; + cache->bcull_percent = 5; + cache->bstop_percent = 1; + + file->private_data = cache; + cache->cachefilesd = file; + return 0; + +} /* end cachefiles_proc_open() */ + +/*****************************************************************************/ +/* + * release a cache + */ +static int cachefiles_proc_release(struct inode *inode, struct file *file) +{ + struct cachefiles_cache *cache = file->private_data; + + _enter(""); + + ASSERT(cache); + + set_bit(CACHEFILES_DEAD, &cache->flags); + + cachefiles_proc_unbind(cache); + + ASSERT(!cache->active_nodes.rb_node); + + /* clean up the control file interface */ + cache->cachefilesd = NULL; + file->private_data = NULL; + cachefiles_open = 0; + + kfree(cache); + + _leave(""); + return 0; + +} /* end cachefiles_proc_release() */ + +/*****************************************************************************/ +/* + * read the cache state + */ +static ssize_t cachefiles_proc_read(struct file *file, char __user *_buffer, + size_t buflen, loff_t *pos) +{ + struct cachefiles_cache *cache = file->private_data; + char buffer[256]; + int n; + + _enter(",,%zu,", buflen); + + if (!test_bit(CACHEFILES_READY, &cache->flags)) + return 0; + + /* check how much space the cache has */ + cachefiles_has_space(cache, 0); + + /* summarise */ + n = snprintf(buffer, sizeof(buffer), + "cull=%c" + " brun=%llx" + " bcull=%llx" + " bstop=%llx", + test_bit(CACHEFILES_CULLING, &cache->flags) ? '1' : '0', + cache->brun, + cache->bcull, + cache->bstop + ); + + if (n > buflen) + return -EMSGSIZE; + + if (copy_to_user(_buffer, buffer, n) != 0) + return -EFAULT; + + return n; + +} /* end cachefiles_proc_read() */ + +/*****************************************************************************/ +/* + * command the cache + */ +static ssize_t cachefiles_proc_write(struct file *file, + const char __user *_data, size_t datalen, + loff_t *pos) +{ + const struct cachefiles_proc_cmd *cmd; + struct cachefiles_cache *cache = file->private_data; + ssize_t ret; + char *data, *args, *cp; + + _enter(",,%zu,", datalen); + + ASSERT(cache); + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) + return -EIO; + + if (datalen < 0 || datalen > PAGE_SIZE - 1) + return -EOPNOTSUPP; + + /* drag the command string into the kernel so we can parse it */ + data = kmalloc(datalen + 1, GFP_KERNEL); + if (!data) + return -ENOMEM; + + ret = -EFAULT; + if (copy_from_user(data, _data, datalen) != 0) + goto error; + + data[datalen] = '\0'; + + ret = -EINVAL; + if (memchr(data, '\0', datalen)) + goto error; + + /* strip any newline */ + cp = memchr(data, '\n', datalen); + if (cp) { + if (cp == data) + goto error; + + *cp = '\0'; + } + + /* parse the command */ + ret = -EOPNOTSUPP; + + for (args = data; *args; args++) + if (isspace(*args)) + break; + if (*args) { + if (args == data) + goto error; + *args = '\0'; + for (args++; isspace(*args); args++) + continue; + } + + /* run the appropriate command handler */ + for (cmd = cachefiles_proc_cmds; cmd->name[0]; cmd++) + if (strcmp(cmd->name, data) == 0) + goto found_command; + +error: + kfree(data); + _leave(" = %d", ret); + return ret; + +found_command: + mutex_lock(&file->f_dentry->d_inode->i_mutex); + + ret = -EIO; + if (!test_bit(CACHEFILES_DEAD, &cache->flags)) + ret = cmd->handler(cache, args); + + mutex_unlock(&file->f_dentry->d_inode->i_mutex); + + if (ret == 0) + ret = datalen; + goto error; + +} /* end cachefiles_proc_write() */ + +/*****************************************************************************/ +/* + * give a range error for cache space constraints + * - can be tail-called + */ +static int cachefiles_proc_range_error(struct cachefiles_cache *cache, char *args) +{ + kerror("Free space limits must be in range" + " 0%%<=bstop%" + */ +static int cachefiles_proc_brun(struct cachefiles_cache *cache, char *args) +{ + unsigned long brun; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + brun = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (brun <= cache->bcull_percent || brun >= 100) + return cachefiles_proc_range_error(cache, args); + + cache->brun_percent = brun; + return 0; + +} /* end cachefiles_proc_brun() */ + +/*****************************************************************************/ +/* + * set the percentage of blocks at which to start culling + * - command: "bcull %" + */ +static int cachefiles_proc_bcull(struct cachefiles_cache *cache, char *args) +{ + unsigned long bcull; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + bcull = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (bcull <= cache->bstop_percent || bcull >= cache->brun_percent) + return cachefiles_proc_range_error(cache, args); + + cache->bcull_percent = bcull; + return 0; + +} /* end cachefiles_proc_bcull() */ + +/*****************************************************************************/ +/* + * set the percentage of blocks at which to stop allocating + * - command: "bstop %" + */ +static int cachefiles_proc_bstop(struct cachefiles_cache *cache, char *args) +{ + unsigned long bstop; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + bstop = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (bstop < 0 || bstop >= cache->bcull_percent) + return cachefiles_proc_range_error(cache, args); + + cache->bstop_percent = bstop; + return 0; + +} /* end cachefiles_proc_bstop() */ + +/*****************************************************************************/ +/* + * set the cache directory + * - command: "dir " + */ +static int cachefiles_proc_dir(struct cachefiles_cache *cache, char *args) +{ + char *dir; + + _enter(",%s", args); + + if (!*args) { + kerror("Empty directory specified"); + return -EINVAL; + } + + if (cache->rootdirname) { + kerror("Second cache directory specified"); + return -EEXIST; + } + + dir = kstrdup(args, GFP_KERNEL); + if (!dir) + return -ENOMEM; + + cache->rootdirname = dir; + return 0; + +} /* end cachefiles_proc_dir() */ + +/*****************************************************************************/ +/* + * set the cache tag + * - command: "tag " + */ +static int cachefiles_proc_tag(struct cachefiles_cache *cache, char *args) +{ + char *tag; + + _enter(",%s", args); + + if (!*args) { + kerror("Empty tag specified"); + return -EINVAL; + } + + if (cache->tag) + return -EEXIST; + + tag = kstrdup(args, GFP_KERNEL); + if (!tag) + return -ENOMEM; + + cache->tag = tag; + return 0; + +} /* end cachefiles_proc_tag() */ + +/*****************************************************************************/ +/* + * request a node in the cache be culled + * - command: "cull " + */ +static int cachefiles_proc_cull(struct cachefiles_cache *cache, char *args) +{ + struct dentry *dir; + struct file *dirfile; + int dirfd, fput_needed, ret; + + _enter(",%s", args); + + dirfd = simple_strtoul(args, &args, 0); + + if (!args || !isspace(*args)) + goto inval; + + while (isspace(*args)) + args++; + + if (!*args) + goto inval; + + if (strchr(args, '/')) + goto inval; + + if (!test_bit(CACHEFILES_READY, &cache->flags)) { + kerror("cull applied to unready cache"); + return -EIO; + } + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + kerror("cull applied to dead cache"); + return -EIO; + } + + /* extract the directory dentry from the fd */ + dirfile = fget_light(dirfd, &fput_needed); + if (!dirfile) { + kerror("cull dirfd not open"); + return -EBADF; + } + + dir = dget(dirfile->f_dentry); + fput_light(dirfile, fput_needed); + dirfile = NULL; + + if (!S_ISDIR(dir->d_inode->i_mode)) + goto notdir; + + ret = cachefiles_cull(cache, dir, args); + + dput(dir); + _leave(" = %d", ret); + return ret; + +notdir: + dput(dir); + kerror("cull command requires dirfd to be a directory"); + return -ENOTDIR; + +inval: + kerror("cull command requires dirfd and filename"); + return -EINVAL; + +} /* end cachefiles_proc_cull() */ + +/*****************************************************************************/ +/* + * set debugging mode + * - command: "debug " + */ +static int cachefiles_proc_debug(struct cachefiles_cache *cache, char *args) +{ + unsigned long mask; + + _enter(",%s", args); + + mask = simple_strtoul(args, &args, 0); + + if (!args || !isspace(*args)) + goto inval; + + cachefiles_debug = mask; + _leave(" = 0"); + return 0; + +inval: + kerror("debug command requires mask"); + return -EINVAL; + +} /* end cachefiles_proc_debug() */ diff --git a/fs/cachefiles/cf-sysctl.c b/fs/cachefiles/cf-sysctl.c new file mode 100644 index 0000000..4d6fc04 --- /dev/null +++ b/fs/cachefiles/cf-sysctl.c @@ -0,0 +1,69 @@ +/* cf-sysctl.c: Control parameters + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include "internal.h" + +static struct ctl_table_header *cachefiles_sysctl; + +/* + * Something that isn't CTL_ANY, CTL_NONE or a value that may clash. + * Use the same values as fs/nfs/sysctl.c + */ +#define CTL_UNNUMBERED -2 + +static ctl_table cachefiles_sysctl_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "debug", + .data = &cachefiles_debug, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax + }, + { .ctl_name = 0 } +}; + +static ctl_table cachefiles_sysctl_dir[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "cachefiles", + .maxlen = 0, + .mode = 0555, + .child = cachefiles_sysctl_table + }, + { .ctl_name = 0 } +}; + +static ctl_table cachefiles_sysctl_root[] = { + { + .ctl_name = CTL_FS, + .procname = "fs", + .mode = 0555, + .child = cachefiles_sysctl_dir, + }, + { .ctl_name = 0 } +}; + +int __init cachefiles_sysctl_init(void) +{ + cachefiles_sysctl = register_sysctl_table(cachefiles_sysctl_root, 0); + if (!cachefiles_sysctl) + return -ENOMEM; + return 0; +} + +void __exit cachefiles_sysctl_cleanup(void) +{ + unregister_sysctl_table(cachefiles_sysctl); +} diff --git a/fs/cachefiles/cf-xattr.c b/fs/cachefiles/cf-xattr.c new file mode 100644 index 0000000..a811667 --- /dev/null +++ b/fs/cachefiles/cf-xattr.c @@ -0,0 +1,299 @@ +/* cf-xattr.c: CacheFiles extended attribute management + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +static const char cachefiles_xattr_cache[] = XATTR_USER_PREFIX "CacheFiles.cache"; + +/*****************************************************************************/ +/* + * check the type label on an object + * - done using xattrs + */ +int cachefiles_check_object_type(struct cachefiles_object *object) +{ + struct dentry *dentry = object->dentry; + char type[3], xtype[3]; + int ret; + + ASSERT(dentry); + ASSERT(dentry->d_inode); + ASSERT(dentry->d_inode->i_op); + ASSERT(dentry->d_inode->i_op->setxattr); + ASSERT(dentry->d_inode->i_op->getxattr); + + if (!object->fscache.cookie) + strcpy(type, "C3"); + else + snprintf(type, 3, "%02x", object->fscache.cookie->def->type); + + _enter("%p{%s}", object, type); + + mutex_lock(&dentry->d_inode->i_mutex); + + /* attempt to install a type label directly */ + ret = dentry->d_inode->i_op->setxattr(dentry, cachefiles_xattr_cache, + type, 2, XATTR_CREATE); + if (ret == 0) { + _debug("SET"); + fsnotify_xattr(dentry); + mutex_unlock(&dentry->d_inode->i_mutex); + goto error; + } + + if (ret != -EEXIST) { + kerror("Can't set xattr on %*.*s [%lu] (err %d)", + dentry->d_name.len, dentry->d_name.len, + dentry->d_name.name, dentry->d_inode->i_ino, + -ret); + goto error; + } + + /* read the current type label */ + ret = dentry->d_inode->i_op->getxattr(dentry, cachefiles_xattr_cache, + xtype, 3); + if (ret < 0) { + if (ret == -ERANGE) + goto bad_type_length; + + kerror("Can't read xattr on %*.*s [%lu] (err %d)", + dentry->d_name.len, dentry->d_name.len, + dentry->d_name.name, dentry->d_inode->i_ino, + -ret); + goto error; + } + + /* check the type is what we're expecting */ + if (ret != 2) + goto bad_type_length; + + if (xtype[0] != type[0] || xtype[1] != type[1]) + goto bad_type; + + ret = 0; + +error: + mutex_unlock(&dentry->d_inode->i_mutex); + _leave(" = %d", ret); + return ret; + +bad_type_length: + kerror("Cache object %lu type xattr length incorrect", + dentry->d_inode->i_ino); + ret = -EIO; + goto error; + +bad_type: + xtype[2] = 0; + kerror("Cache object %*.*s [%lu] type %s not %s", + dentry->d_name.len, dentry->d_name.len, + dentry->d_name.name, dentry->d_inode->i_ino, + xtype, type); + ret = -EIO; + goto error; + +} /* end cachefiles_check_object_type() */ + +/*****************************************************************************/ +/* + * set the state xattr on a cache file + */ +int cachefiles_set_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata) +{ + struct dentry *dentry = object->dentry; + int ret; + + ASSERT(object->fscache.cookie); + ASSERT(dentry); + ASSERT(dentry->d_inode->i_op->setxattr); + + _enter("%p,#%d", object, auxdata->len); + + /* attempt to install the cache metadata directly */ + mutex_lock(&dentry->d_inode->i_mutex); + + _debug("SET %s #%u", + object->fscache.cookie->def->name, auxdata->len); + + ret = dentry->d_inode->i_op->setxattr(dentry, cachefiles_xattr_cache, + &auxdata->type, auxdata->len, + XATTR_CREATE); + if (ret == 0) + fsnotify_xattr(dentry); + else if (ret != -ENOMEM) + cachefiles_io_error_obj(object, + "Failed to set xattr with error %d", + ret); + + mutex_unlock(&dentry->d_inode->i_mutex); + _leave(" = %d", ret); + return ret; + +} /* end cachefiles_set_object_xattr() */ + +/*****************************************************************************/ +/* + * check the state xattr on a cache file + * - return -ESTALE if the object should be deleted + */ +int cachefiles_check_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata) +{ + struct cachefiles_xattr *auxbuf; + struct dentry *dentry = object->dentry; + int ret; + + _enter("%p,#%d", object, auxdata->len); + + ASSERT(dentry); + ASSERT(dentry->d_inode); + ASSERT(dentry->d_inode->i_op->setxattr); + ASSERT(dentry->d_inode->i_op->getxattr); + + auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL); + if (!auxbuf) { + _leave(" = -ENOMEM"); + return -ENOMEM; + } + + mutex_lock(&dentry->d_inode->i_mutex); + + /* read the current type label */ + ret = dentry->d_inode->i_op->getxattr(dentry, cachefiles_xattr_cache, + &auxbuf->type, 512 + 1); + if (ret < 0) { + if (ret == -ENODATA) + goto stale; /* no attribute - power went off + * mid-cull? */ + + if (ret == -ERANGE) + goto bad_type_length; + + cachefiles_io_error_obj(object, + "can't read xattr on %lu (err %d)", + dentry->d_inode->i_ino, -ret); + goto error; + } + + /* check the on-disk object */ + if (ret < 1) + goto bad_type_length; + + if (auxbuf->type != auxdata->type) + goto stale; + + auxbuf->len = ret; + + /* consult the netfs */ + if (object->fscache.cookie->def->check_aux) { + fscache_checkaux_t result; + unsigned int dlen; + + dlen = auxbuf->len - 1; + + _debug("checkaux %s #%u", + object->fscache.cookie->def->name, dlen); + + result = object->fscache.cookie->def->check_aux( + object->fscache.cookie->netfs_data, + &auxbuf->data, dlen); + + switch (result) { + /* entry okay as is */ + case FSCACHE_CHECKAUX_OKAY: + goto okay; + + /* entry requires update */ + case FSCACHE_CHECKAUX_NEEDS_UPDATE: + break; + + /* entry requires deletion */ + case FSCACHE_CHECKAUX_OBSOLETE: + goto stale; + + default: + BUG(); + } + + /* update the current label */ + ret = dentry->d_inode->i_op->setxattr(dentry, + cachefiles_xattr_cache, + &auxdata->type, + auxdata->len, + XATTR_REPLACE); + if (ret < 0) { + cachefiles_io_error_obj(object, + "Can't update xattr on %lu" + " (error %d)", + dentry->d_inode->i_ino, -ret); + goto error; + } + } + +okay: + ret = 0; + +error: + mutex_unlock(&dentry->d_inode->i_mutex); + kfree(auxbuf); + _leave(" = %d", ret); + return ret; + +bad_type_length: + kerror("Cache object %lu xattr length incorrect", + dentry->d_inode->i_ino); + ret = -EIO; + goto error; + +stale: + ret = -ESTALE; + goto error; + +} /* end cachefiles_check_object_xattr() */ + +/*****************************************************************************/ +/* + * remove the object's xattr to mark it stale + */ +int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, + struct dentry *dentry) +{ + int ret; + + mutex_lock(&dentry->d_inode->i_mutex); + + ret = dentry->d_inode->i_op->removexattr(dentry, + cachefiles_xattr_cache); + + mutex_unlock(&dentry->d_inode->i_mutex); + + if (ret < 0) { + if (ret == -ENOENT || ret == -ENODATA) + ret = 0; + else if (ret != -ENOMEM) + cachefiles_io_error(cache, + "Can't remove xattr from %lu" + " (error %d)", + dentry->d_inode->i_ino, -ret); + } + + _leave(" = %d", ret); + return ret; + +} /* end cachefiles_remove_object_xattr() */ diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h new file mode 100644 index 0000000..9b4b76c --- /dev/null +++ b/fs/cachefiles/internal.h @@ -0,0 +1,308 @@ +/* internal.h: general netfs cache on cache files internal defs + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * + * CacheFiles layout: + * + * /..../CacheDir/ + * index + * 0/ + * 1/ + * 2/ + * index + * 0/ + * 1/ + * 2/ + * index + * 0 + * 1 + * 2 + */ + +#include +#include +#include +#include + +struct cachefiles_cache; +struct cachefiles_object; + +extern unsigned long cachefiles_debug; +#define CACHEFILES_DEBUG_KENTER 1 +#define CACHEFILES_DEBUG_KLEAVE 2 +#define CACHEFILES_DEBUG_KDEBUG 4 + +extern struct fscache_cache_ops cachefiles_cache_ops; +extern struct proc_dir_entry *cachefiles_proc; +extern struct file_operations cachefiles_proc_fops; + +/*****************************************************************************/ +/* + * node records + */ +struct cachefiles_object { + struct fscache_object fscache; /* fscache handle */ + struct dentry *dentry; /* the file/dir representing this object */ + struct file *backer; /* backing file */ + loff_t i_size; /* object size */ + atomic_t usage; /* basic object usage count */ + atomic_t fscache_usage; /* FSDEF object usage count */ + uint8_t type; /* object type */ + uint8_t new; /* T if object new */ + spinlock_t work_lock; + struct rw_semaphore sem; + struct work_struct read_work; /* read page copier */ + struct list_head read_list; /* pages to copy */ + struct list_head read_pend_list; /* pages to pending read from backer */ + struct work_struct write_work; /* page writer */ + struct list_head write_list; /* pages to store */ + struct rb_node active_node; /* link in active tree (dentry is key) */ +}; + +extern kmem_cache_t *cachefiles_object_jar; + +/*****************************************************************************/ +/* + * Cache files cache definition + */ +struct cachefiles_cache { + struct fscache_cache cache; /* FS-Cache record */ + struct vfsmount *mnt; /* mountpoint holding the cache */ + struct dentry *graveyard; /* directory into which dead objects go */ + struct file *cachefilesd; /* manager daemon handle */ + struct rb_root active_nodes; /* active nodes (can't be culled) */ + rwlock_t active_lock; /* lock for active_nodes */ + atomic_t gravecounter; /* graveyard uniquifier */ + unsigned brun_percent; /* when to stop culling (%) */ + unsigned bcull_percent; /* when to start culling (%) */ + unsigned bstop_percent; /* when to stop allocating (%) */ + unsigned bsize; /* cache's block size */ + unsigned bshift; /* min(log2 (PAGE_SIZE / bsize), 0) */ + sector_t brun; /* when to stop culling */ + sector_t bcull; /* when to start culling */ + sector_t bstop; /* when to stop allocating */ + unsigned long flags; +#define CACHEFILES_READY 0 /* T if cache prepared */ +#define CACHEFILES_DEAD 1 /* T if cache dead */ +#define CACHEFILES_CULLING 2 /* T if cull engaged */ + char *rootdirname; /* name of cache root directory */ + char *tag; /* cache binding tag */ +}; + +/*****************************************************************************/ +/* + * backing file read tracking + */ +struct cachefiles_one_read { + wait_queue_t monitor; /* link into monitored waitqueue */ + struct page *back_page; /* backing file page we're waiting for */ + struct page *netfs_page; /* netfs page we're going to fill */ + struct cachefiles_object *object; + struct list_head obj_link; /* link in object's lists */ + fscache_rw_complete_t end_io_func; + void *context; +}; + +/*****************************************************************************/ +/* + * backing file write tracking + */ +struct cachefiles_one_write { + struct page *netfs_page; /* netfs page to copy */ + struct cachefiles_object *object; + struct list_head obj_link; /* link in object's lists */ + fscache_rw_complete_t end_io_func; + void *context; +}; + +/*****************************************************************************/ +/* + * auxiliary data xattr buffer + */ +struct cachefiles_xattr { + uint16_t len; + uint8_t type; + uint8_t data[]; +}; + + +/* cf-bind.c */ +extern int cachefiles_proc_bind(struct cachefiles_cache *cache, char *args); +extern void cachefiles_proc_unbind(struct cachefiles_cache *cache); + +/* cf-interface.c */ +extern void cachefiles_read_copier_work(void *_object); +extern void cachefiles_write_work(void *_object); +extern int cachefiles_has_space(struct cachefiles_cache *cache, unsigned nr); + +/* cf-key.c */ +extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type); + +/* cf-namei.c */ +extern int cachefiles_delete_object(struct cachefiles_cache *cache, + struct cachefiles_object *object); +extern int cachefiles_walk_to_object(struct cachefiles_object *parent, + struct cachefiles_object *object, + char *key, + struct cachefiles_xattr *auxdata); +extern struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, + struct dentry *dir, + const char *name); + +extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, + char *filename); + +/* cf-sysctl.c */ +extern int __init cachefiles_sysctl_init(void); +extern void __exit cachefiles_sysctl_cleanup(void); + +/* cf-xattr.c */ +extern int cachefiles_check_object_type(struct cachefiles_object *object); +extern int cachefiles_set_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata); +extern int cachefiles_check_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata); +extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, + struct dentry *dentry); + + +/*****************************************************************************/ +/* + * error handling + */ +#define kerror(FMT,...) printk(KERN_ERR "CacheFiles: "FMT"\n" ,##__VA_ARGS__); + +#define cachefiles_io_error(___cache, FMT, ...) \ +do { \ + kerror("I/O Error: " FMT ,##__VA_ARGS__); \ + fscache_io_error(&(___cache)->cache); \ + set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ +} while(0) + +#define cachefiles_io_error_obj(object, FMT, ...) \ +do { \ + struct cachefiles_cache *___cache; \ + \ + ___cache = container_of((object)->fscache.cache, \ + struct cachefiles_cache, cache); \ + cachefiles_io_error(___cache, FMT ,##__VA_ARGS__); \ +} while(0) + + +/*****************************************************************************/ +/* + * debug tracing + */ +#define dbgprintk(FMT,...) \ + printk("[%-6.6s] "FMT"\n",current->comm ,##__VA_ARGS__) + +/* make sure we maintain the format strings, even when debugging is disabled */ +static inline void _dbprintk(const char *fmt, ...) + __attribute__((format(printf,1,2))); +static inline void _dbprintk(const char *fmt, ...) +{ +} + +#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) +#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) +#define kdebug(FMT,...) dbgprintk(FMT ,##__VA_ARGS__) + + +#if defined(__KDEBUG) +#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__) +#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__) +#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__) + +#elif defined(CONFIG_CACHEFILES_DEBUG) +#define _enter(FMT,...) \ +do { \ + if (cachefiles_debug & CACHEFILES_DEBUG_KENTER) \ + kenter(FMT,##__VA_ARGS__); \ +} while (0) + +#define _leave(FMT,...) \ +do { \ + if (cachefiles_debug & CACHEFILES_DEBUG_KLEAVE) \ + kleave(FMT,##__VA_ARGS__); \ +} while (0) + +#define _debug(FMT,...) \ +do { \ + if (cachefiles_debug & CACHEFILES_DEBUG_KDEBUG) \ + kdebug(FMT,##__VA_ARGS__); \ +} while (0) + +#else +#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) +#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) +#define _debug(FMT,...) _dbprintk(FMT ,##__VA_ARGS__) +#endif + +#if 1 // defined(__KDEBUGALL) + +#define ASSERT(X) \ +do { \ + if (unlikely(!(X))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ + BUG(); \ + } \ +} while(0) + +#define ASSERTCMP(X, OP, Y) \ +do { \ + if (unlikely(!((X) OP (Y)))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ + printk(KERN_ERR "%lx " #OP " %lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while(0) + +#define ASSERTIF(C, X) \ +do { \ + if (unlikely((C) && !(X))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ + BUG(); \ + } \ +} while(0) + +#define ASSERTIFCMP(C, X, OP, Y) \ +do { \ + if (unlikely((C) && !((X) OP (Y)))) { \ + printk(KERN_ERR "\n"); \ + printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ + printk(KERN_ERR "%lx " #OP " %lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while(0) + +#else + +#define ASSERT(X) \ +do { \ +} while(0) + +#define ASSERTCMP(X, OP, Y) \ +do { \ +} while(0) + +#define ASSERTIF(C, X) \ +do { \ +} while(0) + +#define ASSERTIFCMP(C, X, OP, Y) \ +do { \ +} while(0) + +#endif diff --git a/fs/dcache.c b/fs/dcache.c index 1b4a3a3..780f014 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -547,6 +547,139 @@ repeat: } /* + * destroy a single subtree of dentries for unmount + * - see the comments on shrink_dcache_for_umount() for a description of the + * locking + */ +static void shrink_dcache_for_umount_subtree(struct dentry *dentry) +{ + struct dentry *parent; + + BUG_ON(!IS_ROOT(dentry)); + + /* detach this root from the system */ + spin_lock(&dcache_lock); + if (!list_empty(&dentry->d_lru)) { + dentry_stat.nr_unused--; + list_del_init(&dentry->d_lru); + } + __d_drop(dentry); + spin_unlock(&dcache_lock); + + for (;;) { + /* descend to the first leaf in the current subtree */ + while (!list_empty(&dentry->d_subdirs)) { + struct dentry *loop; + + /* this is a branch with children - detach all of them + * from the system in one go */ + spin_lock(&dcache_lock); + list_for_each_entry(loop, &dentry->d_subdirs, + d_u.d_child) { + if (!list_empty(&loop->d_lru)) { + dentry_stat.nr_unused--; + list_del_init(&loop->d_lru); + } + + __d_drop(loop); + cond_resched_lock(&dcache_lock); + } + spin_unlock(&dcache_lock); + + /* move to the first child */ + dentry = list_entry(dentry->d_subdirs.next, + struct dentry, d_u.d_child); + } + + /* consume the dentries from this leaf up through its parents + * until we find one with children or run out altogether */ + do { + struct inode *inode; + + if (atomic_read(&dentry->d_count) != 0) { + printk(KERN_ERR + "BUG: Dentry %p{i=%lx,n=%s}" + " still in use (%d)" + " [unmount of %s %s]\n", + dentry, + dentry->d_inode ? + dentry->d_inode->i_ino : 0UL, + dentry->d_name.name, + atomic_read(&dentry->d_count), + dentry->d_sb->s_type->name, + dentry->d_sb->s_id); + BUG(); + } + + parent = dentry->d_parent; + if (parent == dentry) + parent = NULL; + else + atomic_dec(&parent->d_count); + + list_del(&dentry->d_u.d_child); + dentry_stat.nr_dentry--; /* For d_free, below */ + + inode = dentry->d_inode; + if (inode) { +#ifdef CONFIG_INOTIFY + BUG_ON(!list_empty(&inode->inotify_watches)); +#endif + dentry->d_inode = NULL; + list_del_init(&dentry->d_alias); + if (dentry->d_op && dentry->d_op->d_iput) + dentry->d_op->d_iput(dentry, inode); + else + iput(inode); + } + + d_free(dentry); + + /* finished when we fall off the top of the tree, + * otherwise we ascend to the parent and move to the + * next sibling if there is one */ + if (!parent) + return; + + dentry = parent; + + } while (list_empty(&dentry->d_subdirs)); + + dentry = list_entry(dentry->d_subdirs.next, + struct dentry, d_u.d_child); + } +} + +/* + * destroy the dentries attached to a superblock on unmounting + * - we don't need to use dentry->d_lock, and only need dcache_lock when + * removing the dentry from the system lists and hashes because: + * - the superblock is detached from all mountings and open files, so the + * dentry trees will not be rearranged by the VFS + * - s_umount is write-locked, so the memory pressure shrinker will ignore + * any dentries belonging to this superblock that it comes across + * - the filesystem itself is no longer permitted to rearrange the dentries + * in this superblock + */ +void shrink_dcache_for_umount(struct super_block *sb) +{ + struct dentry *dentry; + + if (down_read_trylock(&sb->s_umount)) + BUG(); + + dentry = sb->s_root; + sb->s_root = NULL; + atomic_dec(&dentry->d_count); + shrink_dcache_for_umount_subtree(dentry); + + while (!hlist_empty(&sb->s_anon)) { + dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); + shrink_dcache_for_umount_subtree(dentry); + } +} + +/* * Search for at least 1 mount point in the dentry's subdirs. * We descend to the next level whenever the d_subdirs * list is non-empty and continue searching. @@ -828,17 +961,19 @@ void d_instantiate(struct dentry *entry, * (or otherwise set) by the caller to indicate that it is now * in use by the dcache. */ -struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) +static struct dentry *__d_instantiate_unique(struct dentry *entry, + struct inode *inode) { struct dentry *alias; int len = entry->d_name.len; const char *name = entry->d_name.name; unsigned int hash = entry->d_name.hash; - BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_lock); - if (!inode) - goto do_negative; + if (!inode) { + entry->d_inode = NULL; + return NULL; + } + list_for_each_entry(alias, &inode->i_dentry, d_alias) { struct qstr *qstr = &alias->d_name; @@ -851,19 +986,35 @@ struct dentry *d_instantiate_unique(stru if (memcmp(qstr->name, name, len)) continue; dget_locked(alias); - spin_unlock(&dcache_lock); - BUG_ON(!d_unhashed(alias)); - iput(inode); return alias; } + list_add(&entry->d_alias, &inode->i_dentry); -do_negative: entry->d_inode = inode; fsnotify_d_instantiate(entry, inode); - spin_unlock(&dcache_lock); - security_d_instantiate(entry, inode); return NULL; } + +struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) +{ + struct dentry *result; + + BUG_ON(!list_empty(&entry->d_alias)); + + spin_lock(&dcache_lock); + result = __d_instantiate_unique(entry, inode); + spin_unlock(&dcache_lock); + + if (!result) { + security_d_instantiate(entry, inode); + return NULL; + } + + BUG_ON(!d_unhashed(result)); + iput(inode); + return result; +} + EXPORT_SYMBOL(d_instantiate_unique); /** @@ -1235,6 +1386,11 @@ static void __d_rehash(struct dentry * e hlist_add_head_rcu(&entry->d_hash, list); } +static void _d_rehash(struct dentry * entry) +{ + __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash)); +} + /** * d_rehash - add an entry back to the hash * @entry: dentry to add to the hash @@ -1244,11 +1400,9 @@ static void __d_rehash(struct dentry * e void d_rehash(struct dentry * entry) { - struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash); - spin_lock(&dcache_lock); spin_lock(&entry->d_lock); - __d_rehash(entry, list); + _d_rehash(entry); spin_unlock(&entry->d_lock); spin_unlock(&dcache_lock); } @@ -1386,6 +1540,120 @@ already_unhashed: spin_unlock(&dcache_lock); } +/* + * Prepare an anonymous dentry for life in the superblock's dentry tree as a + * named dentry in place of the dentry to be replaced. + */ +static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) +{ + struct dentry *dparent, *aparent; + + switch_names(dentry, anon); + do_switch(dentry->d_name.len, anon->d_name.len); + do_switch(dentry->d_name.hash, anon->d_name.hash); + + dparent = dentry->d_parent; + aparent = anon->d_parent; + + dentry->d_parent = (aparent == anon) ? dentry : aparent; + list_del(&dentry->d_u.d_child); + if (!IS_ROOT(dentry)) + list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + else + INIT_LIST_HEAD(&dentry->d_u.d_child); + + anon->d_parent = (dparent == dentry) ? anon : dparent; + list_del(&anon->d_u.d_child); + if (!IS_ROOT(anon)) + list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs); + else + INIT_LIST_HEAD(&anon->d_u.d_child); + + anon->d_flags &= ~DCACHE_DISCONNECTED; +} + +/** + * d_materialise_unique - introduce an inode into the tree + * @dentry: candidate dentry + * @inode: inode to bind to the dentry, to which aliases may be attached + * + * Introduces an dentry into the tree, substituting an extant disconnected + * root directory alias in its place if there is one + */ +struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) +{ + struct dentry *alias, *actual; + + BUG_ON(!d_unhashed(dentry)); + + spin_lock(&dcache_lock); + + if (!inode) { + actual = dentry; + dentry->d_inode = NULL; + goto found_lock; + } + + /* See if a disconnected directory already exists as an anonymous root + * that we should splice into the tree instead */ + if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) { + spin_lock(&alias->d_lock); + + /* Is this a mountpoint that we could splice into our tree? */ + if (IS_ROOT(alias)) + goto connect_mountpoint; + + if (alias->d_name.len == dentry->d_name.len && + alias->d_parent == dentry->d_parent && + memcmp(alias->d_name.name, + dentry->d_name.name, + dentry->d_name.len) == 0) + goto replace_with_alias; + + spin_unlock(&alias->d_lock); + + /* Doh! Seem to be aliasing directories for some reason... */ + dput(alias); + } + + /* Add a unique reference */ + actual = __d_instantiate_unique(dentry, inode); + if (!actual) + actual = dentry; + else if (unlikely(!d_unhashed(actual))) + goto shouldnt_be_hashed; + +found_lock: + spin_lock(&actual->d_lock); +found: + _d_rehash(actual); + spin_unlock(&actual->d_lock); + spin_unlock(&dcache_lock); + + if (actual == dentry) { + security_d_instantiate(dentry, inode); + return NULL; + } + + iput(inode); + return actual; + + /* Convert the anonymous/root alias into an ordinary dentry */ +connect_mountpoint: + __d_materialise_dentry(dentry, alias); + + /* Replace the candidate dentry with the alias in the tree */ +replace_with_alias: + __d_drop(alias); + actual = alias; + goto found; + +shouldnt_be_hashed: + spin_unlock(&dcache_lock); + BUG(); + goto shouldnt_be_hashed; +} + /** * d_path - return the path of a dentry * @dentry: dentry to report @@ -1784,6 +2052,7 @@ EXPORT_SYMBOL(d_instantiate); EXPORT_SYMBOL(d_invalidate); EXPORT_SYMBOL(d_lookup); EXPORT_SYMBOL(d_move); +EXPORT_SYMBOL_GPL(d_materialise_unique); EXPORT_SYMBOL(d_path); EXPORT_SYMBOL(d_prune_aliases); EXPORT_SYMBOL(d_rehash); diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 92ea826..b0e7d9f 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -112,7 +112,7 @@ static void ext2_check_page(struct page if (offs != limit) goto Eend; out: - SetPageChecked(page); + SetPageFsMisc(page); return; /* Too bad, we had an error */ @@ -152,7 +152,7 @@ Eend: dir->i_ino, (page->index<inode)); fail: - SetPageChecked(page); + SetPageFsMisc(page); SetPageError(page); } @@ -165,7 +165,7 @@ static struct page * ext2_get_page(struc kmap(page); if (!PageUptodate(page)) goto fail; - if (!PageChecked(page)) + if (!PageFsMisc(page)) ext2_check_page(page); if (PageError(page)) goto fail; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index f804d5e..8073fc9 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1527,12 +1527,12 @@ static int ext3_journalled_writepage(str goto no_write; } - if (!page_has_buffers(page) || PageChecked(page)) { + if (!page_has_buffers(page) || PageFsMisc(page)) { /* * It's mmapped pagecache. Add buffers and journal it. There * doesn't seem much point in redirtying the page here. */ - ClearPageChecked(page); + ClearPageFsMisc(page); ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, ext3_get_block); if (ret != 0) { @@ -1589,7 +1589,7 @@ static void ext3_invalidatepage(struct p * If it's a full truncate we just forget about the pending dirtying */ if (offset == 0) - ClearPageChecked(page); + ClearPageFsMisc(page); journal_invalidatepage(journal, page, offset); } @@ -1598,7 +1598,7 @@ static int ext3_releasepage(struct page { journal_t *journal = EXT3_JOURNAL(page->mapping->host); - WARN_ON(PageChecked(page)); + WARN_ON(PageFsMisc(page)); if (!page_has_buffers(page)) return 0; return journal_try_to_free_buffers(journal, page, wait); @@ -1694,7 +1694,7 @@ out: */ static int ext3_journalled_set_page_dirty(struct page *page) { - SetPageChecked(page); + SetPageFsMisc(page); return __set_page_dirty_nobuffers(page); } diff --git a/fs/fcntl.c b/fs/fcntl.c index d35cbc6..b43d821 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -529,6 +529,8 @@ int send_sigurg(struct fown_struct *fown return ret; } +EXPORT_SYMBOL(send_sigurg); + static DEFINE_RWLOCK(fasync_lock); static kmem_cache_t *fasync_cache __read_mostly; diff --git a/fs/file_table.c b/fs/file_table.c index 0131ba0..6861eb9 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -29,10 +29,13 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; +struct files_kernel_stat_struct files_kernel_stat; + /* public. Not pretty! */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); static struct percpu_counter nr_files __cacheline_aligned_in_smp; +static atomic_t nr_kernel_files; static inline void file_free_rcu(struct rcu_head *head) { @@ -42,7 +45,10 @@ static inline void file_free_rcu(struct static inline void file_free(struct file *f) { - percpu_counter_dec(&nr_files); + if (f->f_kernel_flags & FKFLAGS_NO_ENFILE) + atomic_dec(&nr_kernel_files); + else + percpu_counter_dec(&nr_files); call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); } @@ -73,45 +79,64 @@ int proc_nr_files(ctl_table *table, int files_stat.nr_files = get_nr_files(); return proc_dointvec(table, write, filp, buffer, lenp, ppos); } +int proc_files_kernel(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + files_kernel_stat.nr_kernel_files = atomic_read(&nr_kernel_files); + return proc_dointvec(table, write, filp, buffer, lenp, ppos); +} #else int proc_nr_files(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } +int proc_files_kernel(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return -ENOSYS; +} #endif /* Find an unused file structure and return a pointer to it. * Returns NULL, if there are no more free file structures or * we run out of memory. */ -struct file *get_empty_filp(void) +struct file *get_empty_kernel_filp(unsigned short kflags) { struct task_struct *tsk; static int old_max; struct file * f; /* - * Privileged users can go above max_files + * Privileged users can go above max_files and internal kernel users + * can avoid it completely */ - if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) { + if (!(kflags & FKFLAGS_NO_ENFILE) && + get_nr_files() >= files_stat.max_files && + !capable(CAP_SYS_ADMIN) + ) { /* - * percpu_counters are inaccurate. Do an expensive check before - * we go and fail. + * percpu_counters are inaccurate. Do an expensive + * check before we go and fail. */ if (percpu_counter_sum(&nr_files) >= files_stat.max_files) goto over; } - f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); + f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); if (f == NULL) goto fail; - percpu_counter_inc(&nr_files); - memset(f, 0, sizeof(*f)); + if (kflags & FKFLAGS_NO_ENFILE) + atomic_inc(&nr_kernel_files); + else + percpu_counter_inc(&nr_files); + if (security_file_alloc(f)) goto fail_sec; + f->f_kernel_flags = kflags; tsk = current; INIT_LIST_HEAD(&f->f_u.fu_list); atomic_set(&f->f_count, 1); @@ -137,6 +162,11 @@ fail: return NULL; } +struct file *get_empty_filp(void) +{ + return get_empty_kernel_filp(0); +} + EXPORT_SYMBOL(get_empty_filp); void fastcall fput(struct file *file) @@ -234,6 +264,7 @@ struct file fastcall *fget_light(unsigne return file; } +EXPORT_SYMBOL_GPL(fget_light); void put_filp(struct file *file) { diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c index decac62..805bbb2 100644 --- a/fs/freevxfs/vxfs_subr.c +++ b/fs/freevxfs/vxfs_subr.c @@ -78,7 +78,7 @@ vxfs_get_page(struct address_space *mapp kmap(pp); if (!PageUptodate(pp)) goto fail; - /** if (!PageChecked(pp)) **/ + /** if (!PageFsMisc(pp)) **/ /** vxfs_check_page(pp); **/ if (PageError(pp)) goto fail; diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile new file mode 100644 index 0000000..10f17a3 --- /dev/null +++ b/fs/fscache/Makefile @@ -0,0 +1,11 @@ +# +# Makefile for general filesystem caching code +# + +fscache-objs := \ + cookie.o \ + fsdef.o \ + main.o \ + page.o + +obj-$(CONFIG_FSCACHE) := fscache.o diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c new file mode 100644 index 0000000..ea017b7 --- /dev/null +++ b/fs/fscache/cookie.c @@ -0,0 +1,1063 @@ +/* cookie.c: general filesystem cache cookie management + * + * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include "fscache-int.h" + +static LIST_HEAD(fscache_cache_tag_list); +static LIST_HEAD(fscache_cache_list); +static LIST_HEAD(fscache_netfs_list); +static DECLARE_RWSEM(fscache_addremove_sem); +static struct fscache_cache_tag fscache_nomem_tag; + +kmem_cache_t *fscache_cookie_jar; + +static void fscache_withdraw_object(struct fscache_cache *cache, + struct fscache_object *object); + +static void __fscache_cookie_put(struct fscache_cookie *cookie); + +static inline void fscache_cookie_put(struct fscache_cookie *cookie) +{ + /* check to see whether the cookie has already been released by looking + * for the poison when slab debugging is on */ +#ifdef CONFIG_DEBUG_SLAB + BUG_ON((atomic_read(&cookie->usage) & 0xffff0000) == 0x6b6b0000); +#endif + + BUG_ON(atomic_read(&cookie->usage) <= 0); + + if (atomic_dec_and_test(&cookie->usage)) + __fscache_cookie_put(cookie); + +} + +/*****************************************************************************/ +/* + * look up a cache tag + */ +struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *name) +{ + struct fscache_cache_tag *tag, *xtag; + + /* firstly check for the existence of the tag under read lock */ + down_read(&fscache_addremove_sem); + + list_for_each_entry(tag, &fscache_cache_tag_list, link) { + if (strcmp(tag->name, name) == 0) { + atomic_inc(&tag->usage); + up_read(&fscache_addremove_sem); + return tag; + } + } + + up_read(&fscache_addremove_sem); + + /* the tag does not exist - create a candidate */ + xtag = kmalloc(sizeof(*xtag) + strlen(name) + 1, GFP_KERNEL); + if (!xtag) + /* return a dummy tag if out of memory */ + return &fscache_nomem_tag; + + atomic_set(&xtag->usage, 1); + strcpy(xtag->name, name); + + /* write lock, search again and add if still not present */ + down_write(&fscache_addremove_sem); + + list_for_each_entry(tag, &fscache_cache_tag_list, link) { + if (strcmp(tag->name, name) == 0) { + atomic_inc(&tag->usage); + up_write(&fscache_addremove_sem); + kfree(xtag); + return tag; + } + } + + list_add_tail(&xtag->link, &fscache_cache_tag_list); + up_write(&fscache_addremove_sem); + return xtag; + +} /* end __fscache_lookup_cache_tag() */ + +/*****************************************************************************/ +/* + * release a reference to a cache tag + */ +void __fscache_release_cache_tag(struct fscache_cache_tag *tag) +{ + if (tag != &fscache_nomem_tag) { + down_write(&fscache_addremove_sem); + + if (atomic_dec_and_test(&tag->usage)) + list_del_init(&tag->link); + else + tag = NULL; + + up_write(&fscache_addremove_sem); + + kfree(tag); + } + +} /* end __fscache_release_cache_tag() */ + +/*****************************************************************************/ +/* + * register a network filesystem for caching + */ +int __fscache_register_netfs(struct fscache_netfs *netfs) +{ + struct fscache_netfs *ptr; + int ret; + + _enter("{%s}", netfs->name); + + INIT_LIST_HEAD(&netfs->link); + + /* allocate a cookie for the primary index */ + netfs->primary_index = + kmem_cache_zalloc(fscache_cookie_jar, SLAB_KERNEL); + + if (!netfs->primary_index) { + _leave(" = -ENOMEM"); + return -ENOMEM; + } + + /* initialise the primary index cookie */ + atomic_set(&netfs->primary_index->usage, 1); + atomic_set(&netfs->primary_index->children, 0); + + netfs->primary_index->def = &fscache_fsdef_netfs_def; + netfs->primary_index->parent = &fscache_fsdef_index; + netfs->primary_index->netfs = netfs; + netfs->primary_index->netfs_data = netfs; + + atomic_inc(&netfs->primary_index->parent->usage); + atomic_inc(&netfs->primary_index->parent->children); + + init_rwsem(&netfs->primary_index->sem); + INIT_HLIST_HEAD(&netfs->primary_index->backing_objects); + + /* check the netfs type is not already present */ + down_write(&fscache_addremove_sem); + + ret = -EEXIST; + list_for_each_entry(ptr, &fscache_netfs_list, link) { + if (strcmp(ptr->name, netfs->name) == 0) + goto already_registered; + } + + list_add(&netfs->link, &fscache_netfs_list); + ret = 0; + + printk("FS-Cache: netfs '%s' registered for caching\n", netfs->name); + +already_registered: + up_write(&fscache_addremove_sem); + + if (ret < 0) { + netfs->primary_index->parent = NULL; + __fscache_cookie_put(netfs->primary_index); + netfs->primary_index = NULL; + } + + _leave(" = %d", ret); + return ret; + +} /* end __fscache_register_netfs() */ + +EXPORT_SYMBOL(__fscache_register_netfs); + +/*****************************************************************************/ +/* + * unregister a network filesystem from the cache + * - all cookies must have been released first + */ +void __fscache_unregister_netfs(struct fscache_netfs *netfs) +{ + _enter("{%s.%u}", netfs->name, netfs->version); + + down_write(&fscache_addremove_sem); + + list_del(&netfs->link); + fscache_relinquish_cookie(netfs->primary_index, 0); + + up_write(&fscache_addremove_sem); + + printk("FS-Cache: netfs '%s' unregistered from caching\n", + netfs->name); + + _leave(""); + +} /* end __fscache_unregister_netfs() */ + +EXPORT_SYMBOL(__fscache_unregister_netfs); + +/*****************************************************************************/ +/* + * initialise a cache record + */ +void fscache_init_cache(struct fscache_cache *cache, + struct fscache_cache_ops *ops, + const char *idfmt, + ...) +{ + va_list va; + + memset(cache, 0, sizeof(*cache)); + + cache->ops = ops; + + va_start(va, idfmt); + vsnprintf(cache->identifier, sizeof(cache->identifier), idfmt, va); + va_end(va); + + INIT_LIST_HEAD(&cache->link); + INIT_LIST_HEAD(&cache->object_list); + spin_lock_init(&cache->object_list_lock); + init_rwsem(&cache->withdrawal_sem); + +} /* end fscache_init_cache() */ + +EXPORT_SYMBOL(fscache_init_cache); + +/*****************************************************************************/ +/* + * declare a mounted cache as being open for business + */ +int fscache_add_cache(struct fscache_cache *cache, + struct fscache_object *ifsdef, + const char *tagname) +{ + struct fscache_cache_tag *tag; + + BUG_ON(!cache->ops); + BUG_ON(!ifsdef); + + cache->flags = 0; + + if (!tagname) + tagname = cache->identifier; + + BUG_ON(!tagname[0]); + + _enter("{%s.%s},,%s", cache->ops->name, cache->identifier, tagname); + + if (!cache->ops->grab_object(ifsdef)) + BUG(); + + ifsdef->cookie = &fscache_fsdef_index; + ifsdef->cache = cache; + cache->fsdef = ifsdef; + + down_write(&fscache_addremove_sem); + + /* instantiate or allocate a cache tag */ + list_for_each_entry(tag, &fscache_cache_tag_list, link) { + if (strcmp(tag->name, tagname) == 0) { + if (tag->cache) { + printk(KERN_ERR + "FS-Cache: cache tag '%s' already in use\n", + tagname); + up_write(&fscache_addremove_sem); + return -EEXIST; + } + + atomic_inc(&tag->usage); + goto found_cache_tag; + } + } + + tag = kmalloc(sizeof(*tag) + strlen(tagname) + 1, GFP_KERNEL); + if (!tag) { + up_write(&fscache_addremove_sem); + return -ENOMEM; + } + + atomic_set(&tag->usage, 1); + strcpy(tag->name, tagname); + list_add_tail(&tag->link, &fscache_cache_tag_list); + +found_cache_tag: + tag->cache = cache; + cache->tag = tag; + + /* add the cache to the list */ + list_add(&cache->link, &fscache_cache_list); + + /* add the cache's netfs definition index object to the cache's + * list */ + spin_lock(&cache->object_list_lock); + list_add_tail(&ifsdef->cache_link, &cache->object_list); + spin_unlock(&cache->object_list_lock); + + /* add the cache's netfs definition index object to the top level index + * cookie as a known backing object */ + down_write(&fscache_fsdef_index.sem); + + hlist_add_head(&ifsdef->cookie_link, + &fscache_fsdef_index.backing_objects); + + atomic_inc(&fscache_fsdef_index.usage); + + /* done */ + up_write(&fscache_fsdef_index.sem); + up_write(&fscache_addremove_sem); + + printk(KERN_NOTICE + "FS-Cache: Cache \"%s\" added (type %s)\n", + cache->tag->name, cache->ops->name); + + _leave(" = 0 [%s]", cache->identifier); + return 0; + +} /* end fscache_add_cache() */ + +EXPORT_SYMBOL(fscache_add_cache); + +/*****************************************************************************/ +/* + * note a cache I/O error + */ +void fscache_io_error(struct fscache_cache *cache) +{ + set_bit(FSCACHE_IOERROR, &cache->flags); + + printk(KERN_ERR "FS-Cache: Cache %s stopped due to I/O error\n", + cache->ops->name); + +} /* end fscache_io_error() */ + +EXPORT_SYMBOL(fscache_io_error); + +/*****************************************************************************/ +/* + * withdraw an unmounted cache from the active service + */ +void fscache_withdraw_cache(struct fscache_cache *cache) +{ + struct fscache_object *object; + + _enter(""); + + printk(KERN_NOTICE + "FS-Cache: Withdrawing cache \"%s\"\n", + cache->tag->name); + + /* make the cache unavailable for cookie acquisition */ + down_write(&cache->withdrawal_sem); + + down_write(&fscache_addremove_sem); + list_del_init(&cache->link); + cache->tag->cache = NULL; + up_write(&fscache_addremove_sem); + + /* mark all objects as being withdrawn */ + spin_lock(&cache->object_list_lock); + list_for_each_entry(object, &cache->object_list, cache_link) { + set_bit(FSCACHE_OBJECT_WITHDRAWN, &object->flags); + } + spin_unlock(&cache->object_list_lock); + + /* make sure all pages pinned by operations on behalf of the netfs are + * written to disc */ + cache->ops->sync_cache(cache); + + /* dissociate all the netfs pages backed by this cache from the block + * mappings in the cache */ + cache->ops->dissociate_pages(cache); + + /* we now have to destroy all the active objects pertaining to this + * cache */ + spin_lock(&cache->object_list_lock); + + while (!list_empty(&cache->object_list)) { + object = list_entry(cache->object_list.next, + struct fscache_object, cache_link); + list_del_init(&object->cache_link); + spin_unlock(&cache->object_list_lock); + + _debug("withdraw %p", object->cookie); + + /* we've extracted an active object from the tree - now dispose + * of it */ + fscache_withdraw_object(cache, object); + + spin_lock(&cache->object_list_lock); + } + + spin_unlock(&cache->object_list_lock); + + fscache_release_cache_tag(cache->tag); + cache->tag = NULL; + + _leave(""); + +} /* end fscache_withdraw_cache() */ + +EXPORT_SYMBOL(fscache_withdraw_cache); + +/*****************************************************************************/ +/* + * withdraw an object from active service at the behest of the cache + * - need break the links to a cached object cookie + * - called under two situations: + * (1) recycler decides to reclaim an in-use object + * (2) a cache is unmounted + * - have to take care as the cookie can be being relinquished by the netfs + * simultaneously + * - the active object is pinned by the caller holding a refcount on it + */ +static void fscache_withdraw_object(struct fscache_cache *cache, + struct fscache_object *object) +{ + struct fscache_cookie *cookie, *xcookie = NULL; + + _enter(",%p", object); + + /* first of all we have to break the links between the object and the + * cookie + * - we have to hold both semaphores BUT we have to get the cookie sem + * FIRST + */ + cache->ops->lock_object(object); + + cookie = object->cookie; + if (cookie) { + /* pin the cookie so that is doesn't escape */ + atomic_inc(&cookie->usage); + + /* re-order the locks to avoid deadlock */ + cache->ops->unlock_object(object); + down_write(&cookie->sem); + cache->ops->lock_object(object); + + /* erase references from the object to the cookie */ + hlist_del_init(&object->cookie_link); + + xcookie = object->cookie; + object->cookie = NULL; + + up_write(&cookie->sem); + } + + cache->ops->unlock_object(object); + + /* we've broken the links between cookie and object */ + if (xcookie) { + fscache_cookie_put(xcookie); + cache->ops->put_object(object); + } + + /* unpin the cookie */ + if (cookie) { + if (cookie->def && cookie->def->now_uncached) + cookie->def->now_uncached(cookie->netfs_data); + fscache_cookie_put(cookie); + } + + _leave(""); + +} /* end fscache_withdraw_object() */ + +/*****************************************************************************/ +/* + * select a cache on which to store an object + * - the cache addremove semaphore must be at least read-locked by the caller + * - the object will never be an index + */ +static struct fscache_cache *fscache_select_cache_for_object(struct fscache_cookie *cookie) +{ + struct fscache_cache_tag *tag; + struct fscache_object *object; + struct fscache_cache *cache; + + _enter(""); + + if (list_empty(&fscache_cache_list)) { + _leave(" = NULL [no cache]"); + return NULL; + } + + /* we check the parent to determine the cache to use */ + down_read(&cookie->parent->sem); + + /* the first in the parent's backing list should be the preferred + * cache */ + if (!hlist_empty(&cookie->parent->backing_objects)) { + object = hlist_entry(cookie->parent->backing_objects.first, + struct fscache_object, cookie_link); + + cache = object->cache; + if (test_bit(FSCACHE_IOERROR, &cache->flags)) + cache = NULL; + + up_read(&cookie->parent->sem); + _leave(" = %p [parent]", cache); + return cache; + } + + /* the parent is unbacked */ + if (cookie->parent->def->type != FSCACHE_COOKIE_TYPE_INDEX) { + /* parent not an index and is unbacked */ + up_read(&cookie->parent->sem); + _leave(" = NULL [parent ubni]"); + return NULL; + } + + up_read(&cookie->parent->sem); + + if (!cookie->parent->def->select_cache) + goto no_preference; + + /* ask the netfs for its preference */ + tag = cookie->parent->def->select_cache( + cookie->parent->parent->netfs_data, + cookie->parent->netfs_data); + + if (!tag) + goto no_preference; + + if (tag == &fscache_nomem_tag) { + _leave(" = NULL [nomem tag]"); + return NULL; + } + + if (!tag->cache) { + _leave(" = NULL [unbacked tag]"); + return NULL; + } + + if (test_bit(FSCACHE_IOERROR, &tag->cache->flags)) + return NULL; + + _leave(" = %p [specific]", tag->cache); + return tag->cache; + +no_preference: + /* netfs has no preference - just select first cache */ + cache = list_entry(fscache_cache_list.next, + struct fscache_cache, link); + _leave(" = %p [first]", cache); + return cache; + +} /* end fscache_select_cache_for_object() */ + +/*****************************************************************************/ +/* + * get a backing object for a cookie from the chosen cache + * - the cookie must be write-locked by the caller + * - all parent indexes will be obtained recursively first + */ +static struct fscache_object *fscache_lookup_object(struct fscache_cookie *cookie, + struct fscache_cache *cache) +{ + struct fscache_cookie *parent = cookie->parent; + struct fscache_object *pobject, *object; + struct hlist_node *_p; + + _enter("{%s/%s},", + parent && parent->def ? parent->def->name : "", + cookie->def ? (char *) cookie->def->name : ""); + + if (test_bit(FSCACHE_IOERROR, &cache->flags)) + return NULL; + + /* see if we have the backing object for this cookie + cache immediately + * to hand + */ + object = NULL; + hlist_for_each_entry(object, _p, + &cookie->backing_objects, cookie_link + ) { + if (object->cache == cache) + break; + } + + if (object) { + _leave(" = %p [old]", object); + return object; + } + + BUG_ON(!parent); /* FSDEF entries don't have a parent */ + + /* we don't have a backing cookie, so we need to consult the object's + * parent index in the selected cache and maybe insert an entry + * therein; so the first thing to do is make sure that the parent index + * is represented on disc + */ + down_read(&parent->sem); + + pobject = NULL; + hlist_for_each_entry(pobject, _p, + &parent->backing_objects, cookie_link + ) { + if (pobject->cache == cache) + break; + } + + if (!pobject) { + /* we don't know about the parent object */ + up_read(&parent->sem); + down_write(&parent->sem); + + pobject = fscache_lookup_object(parent, cache); + if (IS_ERR(pobject)) { + up_write(&parent->sem); + _leave(" = %ld [no ipobj]", PTR_ERR(pobject)); + return pobject; + } + + _debug("pobject=%p", pobject); + + BUG_ON(pobject->cookie != parent); + + downgrade_write(&parent->sem); + } + + /* now we can attempt to look up this object in the parent, possibly + * creating a representation on disc when we do so + */ + object = cache->ops->lookup_object(cache, pobject, cookie); + up_read(&parent->sem); + + if (IS_ERR(object)) { + _leave(" = %ld [no obj]", PTR_ERR(object)); + return object; + } + + /* keep track of it */ + cache->ops->lock_object(object); + + BUG_ON(!hlist_unhashed(&object->cookie_link)); + + /* attach to the cache's object list */ + if (list_empty(&object->cache_link)) { + spin_lock(&cache->object_list_lock); + list_add(&object->cache_link, &cache->object_list); + spin_unlock(&cache->object_list_lock); + } + + /* attach to the cookie */ + object->cookie = cookie; + atomic_inc(&cookie->usage); + hlist_add_head(&object->cookie_link, &cookie->backing_objects); + + /* done */ + cache->ops->unlock_object(object); + _leave(" = %p [new]", object); + return object; + +} /* end fscache_lookup_object() */ + +/*****************************************************************************/ +/* + * request a cookie to represent an object (index, datafile, xattr, etc) + * - parent specifies the parent object + * - the top level index cookie for each netfs is stored in the fscache_netfs + * struct upon registration + * - idef points to the definition + * - the netfs_data will be passed to the functions pointed to in *def + * - all attached caches will be searched to see if they contain this object + * - index objects aren't stored on disk until there's a dependent file that + * needs storing + * - other objects are stored in a selected cache immediately, and all the + * indexes forming the path to it are instantiated if necessary + * - we never let on to the netfs about errors + * - we may set a negative cookie pointer, but that's okay + */ +struct fscache_cookie *__fscache_acquire_cookie(struct fscache_cookie *parent, + struct fscache_cookie_def *def, + void *netfs_data) +{ + struct fscache_cookie *cookie; + struct fscache_cache *cache; + struct fscache_object *object; + int ret = 0; + + BUG_ON(!def); + + _enter("{%s},{%s},%p", + parent ? (char *) parent->def->name : "", + def->name, netfs_data); + + /* if there's no parent cookie, then we don't create one here either */ + if (!parent) { + _leave(" [no parent]"); + return NULL; + } + + /* validate the definition */ + BUG_ON(!def->get_key); + BUG_ON(!def->name[0]); + + BUG_ON(def->type == FSCACHE_COOKIE_TYPE_INDEX && + parent->def->type != FSCACHE_COOKIE_TYPE_INDEX); + + /* allocate and initialise a cookie */ + cookie = kmem_cache_alloc(fscache_cookie_jar, SLAB_KERNEL); + if (!cookie) { + _leave(" [ENOMEM]"); + return NULL; + } + + atomic_set(&cookie->usage, 1); + atomic_set(&cookie->children, 0); + + atomic_inc(&parent->usage); + atomic_inc(&parent->children); + + cookie->def = def; + cookie->parent = parent; + cookie->netfs = parent->netfs; + cookie->netfs_data = netfs_data; + + /* now we need to see whether the backing objects for this cookie yet + * exist, if not there'll be nothing to search */ + down_read(&fscache_addremove_sem); + + if (list_empty(&fscache_cache_list)) { + up_read(&fscache_addremove_sem); + _leave(" = %p [no caches]", cookie); + return cookie; + } + + /* if the object is an index then we need do nothing more here - we + * create indexes on disk when we need them as an index may exist in + * multiple caches */ + if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { + down_write(&cookie->sem); + + /* the object is a file - we need to select a cache in which to + * store it */ + cache = fscache_select_cache_for_object(cookie); + if (!cache) + goto no_cache; /* couldn't decide on a cache */ + + /* create a file index entry on disc, along with all the + * indexes required to find it again later */ + object = fscache_lookup_object(cookie, cache); + if (IS_ERR(object)) { + ret = PTR_ERR(object); + goto error; + } + + up_write(&cookie->sem); + } +out: + up_read(&fscache_addremove_sem); + _leave(" = %p", cookie); + return cookie; + +no_cache: + ret = -ENOMEDIUM; + goto error_cleanup; +error: + printk(KERN_ERR "FS-Cache: error from cache: %d\n", ret); +error_cleanup: + if (cookie) { + up_write(&cookie->sem); + __fscache_cookie_put(cookie); + cookie = NULL; + atomic_dec(&parent->children); + } + + goto out; + +} /* end __fscache_acquire_cookie() */ + +EXPORT_SYMBOL(__fscache_acquire_cookie); + +/*****************************************************************************/ +/* + * release a cookie back to the cache + * - the object will be marked as recyclable on disc if retire is true + * - all dependents of this cookie must have already been unregistered + * (indexes/files/pages) + */ +void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) +{ + struct fscache_cache *cache; + struct fscache_object *object; + struct hlist_node *_p; + + if (!cookie) { + _leave(" [no cookie]"); + return; + } + + _enter("%p{%s},%d", cookie, cookie->def->name, retire); + + if (atomic_read(&cookie->children) != 0) { + printk("FS-Cache: cookie still has children\n"); + BUG(); + } + + /* detach pointers back to the netfs */ + down_write(&cookie->sem); + + cookie->netfs_data = NULL; + cookie->def = NULL; + + /* mark retired objects for recycling */ + if (retire) { + hlist_for_each_entry(object, _p, + &cookie->backing_objects, + cookie_link + ) { + set_bit(FSCACHE_OBJECT_RECYCLING, &object->flags); + } + } + + /* break links with all the active objects */ + while (!hlist_empty(&cookie->backing_objects)) { + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, + cookie_link); + + /* detach each cache object from the object cookie */ + set_bit(FSCACHE_OBJECT_RELEASING, &object->flags); + + hlist_del_init(&object->cookie_link); + + cache = object->cache; + cache->ops->lock_object(object); + object->cookie = NULL; + cache->ops->unlock_object(object); + + if (atomic_dec_and_test(&cookie->usage)) + /* the cookie refcount shouldn't be reduced to 0 yet */ + BUG(); + + spin_lock(&cache->object_list_lock); + list_del_init(&object->cache_link); + spin_unlock(&cache->object_list_lock); + + cache->ops->put_object(object); + } + + up_write(&cookie->sem); + + if (cookie->parent) { +#ifdef CONFIG_DEBUG_SLAB + BUG_ON((atomic_read(&cookie->parent->children) & 0xffff0000) == 0x6b6b0000); +#endif + atomic_dec(&cookie->parent->children); + } + + /* finally dispose of the cookie */ + fscache_cookie_put(cookie); + + _leave(""); + +} /* end __fscache_relinquish_cookie() */ + +EXPORT_SYMBOL(__fscache_relinquish_cookie); + +/*****************************************************************************/ +/* + * update the index entries backing a cookie + */ +void __fscache_update_cookie(struct fscache_cookie *cookie) +{ + struct fscache_object *object; + struct hlist_node *_p; + + if (!cookie) { + _leave(" [no cookie]"); + return; + } + + _enter("{%s}", cookie->def->name); + + BUG_ON(!cookie->def->get_aux); + + down_write(&cookie->sem); + down_read(&cookie->parent->sem); + + /* update the index entry on disc in each cache backing this cookie */ + hlist_for_each_entry(object, _p, + &cookie->backing_objects, cookie_link + ) { + if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) + object->cache->ops->update_object(object); + } + + up_read(&cookie->parent->sem); + up_write(&cookie->sem); + _leave(""); + +} /* end __fscache_update_cookie() */ + +EXPORT_SYMBOL(__fscache_update_cookie); + +/*****************************************************************************/ +/* + * destroy a cookie + */ +static void __fscache_cookie_put(struct fscache_cookie *cookie) +{ + struct fscache_cookie *parent; + + _enter("%p", cookie); + + for (;;) { + parent = cookie->parent; + BUG_ON(!hlist_empty(&cookie->backing_objects)); + kmem_cache_free(fscache_cookie_jar, cookie); + + if (!parent) + break; + + cookie = parent; + BUG_ON(atomic_read(&cookie->usage) <= 0); + if (!atomic_dec_and_test(&cookie->usage)) + break; + } + + _leave(""); + +} /* end __fscache_cookie_put() */ + +/*****************************************************************************/ +/* + * initialise an cookie jar slab element prior to any use + */ +void fscache_cookie_init_once(void *_cookie, kmem_cache_t *cachep, + unsigned long flags) +{ + struct fscache_cookie *cookie = _cookie; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) { + memset(cookie, 0, sizeof(*cookie)); + init_rwsem(&cookie->sem); + INIT_HLIST_HEAD(&cookie->backing_objects); + } + +} /* end fscache_cookie_init_once() */ + +/*****************************************************************************/ +/* + * pin an object into the cache + */ +int __fscache_pin_cookie(struct fscache_cookie *cookie) +{ + struct fscache_object *object; + int ret; + + _enter("%p", cookie); + + if (hlist_empty(&cookie->backing_objects)) { + _leave(" = -ENOBUFS"); + return -ENOBUFS; + } + + /* not supposed to use this for indexes */ + BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX); + + /* prevent the file from being uncached whilst we access it and exclude + * read and write attempts on pages + */ + down_write(&cookie->sem); + + ret = -ENOBUFS; + if (!hlist_empty(&cookie->backing_objects)) { + /* get and pin the backing object */ + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) + goto out; + + if (!object->cache->ops->pin_object) { + ret = -EOPNOTSUPP; + goto out; + } + + /* prevent the cache from being withdrawn */ + if (fscache_operation_lock(object)) { + if (object->cache->ops->grab_object(object)) { + /* ask the cache to honour the operation */ + ret = object->cache->ops->pin_object(object); + + object->cache->ops->put_object(object); + } + + fscache_operation_unlock(object); + } + } + +out: + up_write(&cookie->sem); + _leave(" = %d", ret); + return ret; + +} /* end __fscache_pin_cookie() */ + +EXPORT_SYMBOL(__fscache_pin_cookie); + +/*****************************************************************************/ +/* + * unpin an object into the cache + */ +void __fscache_unpin_cookie(struct fscache_cookie *cookie) +{ + struct fscache_object *object; + int ret; + + _enter("%p", cookie); + + if (hlist_empty(&cookie->backing_objects)) { + _leave(" [no obj]"); + return; + } + + /* not supposed to use this for indexes */ + BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX); + + /* prevent the file from being uncached whilst we access it and exclude + * read and write attempts on pages + */ + down_write(&cookie->sem); + + ret = -ENOBUFS; + if (!hlist_empty(&cookie->backing_objects)) { + /* get and unpin the backing object */ + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) + goto out; + + if (!object->cache->ops->unpin_object) + goto out; + + /* prevent the cache from being withdrawn */ + if (fscache_operation_lock(object)) { + if (object->cache->ops->grab_object(object)) { + /* ask the cache to honour the operation */ + object->cache->ops->unpin_object(object); + + object->cache->ops->put_object(object); + } + + fscache_operation_unlock(object); + } + } + +out: + up_write(&cookie->sem); + _leave(""); + +} /* end __fscache_unpin_cookie() */ + +EXPORT_SYMBOL(__fscache_unpin_cookie); diff --git a/fs/fscache/fscache-int.h b/fs/fscache/fscache-int.h new file mode 100644 index 0000000..d075660 --- /dev/null +++ b/fs/fscache/fscache-int.h @@ -0,0 +1,93 @@ +/* fscache-int.h: internal definitions + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _FSCACHE_INT_H +#define _FSCACHE_INT_H + +#include +#include +#include + +extern kmem_cache_t *fscache_cookie_jar; + +extern struct fscache_cookie fscache_fsdef_index; +extern struct fscache_cookie_def fscache_fsdef_netfs_def; + +extern void fscache_cookie_init_once(void *_cookie, kmem_cache_t *cachep, unsigned long flags); + +/* + * prevent the cache from being withdrawn whilst an operation is in progress + * - returns false if the cache is being withdrawn already or if the cache is + * waiting to withdraw itself + * - returns true if the cache was not being withdrawn + * - fscache_withdraw_cache() will wait using down_write() until all ops are + * complete + */ +static inline int fscache_operation_lock(struct fscache_object *object) +{ + return down_read_trylock(&object->cache->withdrawal_sem); +} + +/* + * release the operation lock + */ +static inline void fscache_operation_unlock(struct fscache_object *object) +{ + up_read(&object->cache->withdrawal_sem); +} + + +/*****************************************************************************/ +/* + * debug tracing + */ +#define dbgprintk(FMT,...) \ + printk("[%-6.6s] "FMT"\n",current->comm ,##__VA_ARGS__) +#define _dbprintk(FMT,...) do { } while(0) + +#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) +#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) +#define kdebug(FMT,...) dbgprintk(FMT ,##__VA_ARGS__) + +#define kjournal(FMT,...) _dbprintk(FMT ,##__VA_ARGS__) + +#define dbgfree(ADDR) _dbprintk("%p:%d: FREEING %p",__FILE__,__LINE__,ADDR) + +#define dbgpgalloc(PAGE) \ +do { \ + _dbprintk("PGALLOC %s:%d: %p {%lx,%lu}\n", \ + __FILE__,__LINE__, \ + (PAGE),(PAGE)->mapping->host->i_ino,(PAGE)->index \ + ); \ +} while(0) + +#define dbgpgfree(PAGE) \ +do { \ + if ((PAGE)) \ + _dbprintk("PGFREE %s:%d: %p {%lx,%lu}\n", \ + __FILE__,__LINE__, \ + (PAGE), \ + (PAGE)->mapping->host->i_ino, \ + (PAGE)->index \ + ); \ +} while(0) + +#ifdef __KDEBUG +#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__) +#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__) +#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__) +#else +#define _enter(FMT,...) do { } while(0) +#define _leave(FMT,...) do { } while(0) +#define _debug(FMT,...) do { } while(0) +#endif + +#endif /* _FSCACHE_INT_H */ diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c new file mode 100644 index 0000000..75a0d45 --- /dev/null +++ b/fs/fscache/fsdef.c @@ -0,0 +1,113 @@ +/* fsdef.c: filesystem index definition + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include "fscache-int.h" + +static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax); + +static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax); + +static fscache_checkaux_t fscache_fsdef_netfs_check_aux(void *cookie_netfs_data, + const void *data, + uint16_t datalen); + +struct fscache_cookie_def fscache_fsdef_netfs_def = { + .name = "FSDEF.netfs", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = fscache_fsdef_netfs_get_key, + .get_aux = fscache_fsdef_netfs_get_aux, + .check_aux = fscache_fsdef_netfs_check_aux, +}; + +struct fscache_cookie fscache_fsdef_index = { + .usage = ATOMIC_INIT(1), + .def = NULL, + .sem = __RWSEM_INITIALIZER(fscache_fsdef_index.sem), + .backing_objects = HLIST_HEAD_INIT, +}; + +EXPORT_SYMBOL(fscache_fsdef_index); + +/*****************************************************************************/ +/* + * get the key data for an FSDEF index record + */ +static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct fscache_netfs *netfs = cookie_netfs_data; + unsigned klen; + + _enter("{%s.%u},", netfs->name, netfs->version); + + klen = strlen(netfs->name); + if (klen > bufmax) + return 0; + + memcpy(buffer, netfs->name, klen); + return klen; + +} /* end fscache_fsdef_netfs_get_key() */ + +/*****************************************************************************/ +/* + * get the auxilliary data for an FSDEF index record + */ +static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct fscache_netfs *netfs = cookie_netfs_data; + unsigned dlen; + + _enter("{%s.%u},", netfs->name, netfs->version); + + dlen = sizeof(uint32_t); + if (dlen > bufmax) + return 0; + + memcpy(buffer, &netfs->version, dlen); + return dlen; + +} /* end fscache_fsdef_netfs_get_aux() */ + +/*****************************************************************************/ +/* + * check that the version stored in the auxilliary data is correct + */ +static fscache_checkaux_t fscache_fsdef_netfs_check_aux(void *cookie_netfs_data, + const void *data, + uint16_t datalen) +{ + struct fscache_netfs *netfs = cookie_netfs_data; + uint32_t version; + + _enter("{%s},,%hu", netfs->name, datalen); + + if (datalen != sizeof(version)) { + _leave(" = OBSOLETE [dl=%d v=%d]", + datalen, sizeof(version)); + return FSCACHE_CHECKAUX_OBSOLETE; + } + + memcpy(&version, data, sizeof(version)); + if (version != netfs->version) { + _leave(" = OBSOLETE [ver=%x net=%x]", + version, netfs->version); + return FSCACHE_CHECKAUX_OBSOLETE; + } + + _leave(" = OKAY"); + return FSCACHE_CHECKAUX_OKAY; + +} /* end fscache_fsdef_netfs_check_aux() */ diff --git a/fs/fscache/main.c b/fs/fscache/main.c new file mode 100644 index 0000000..4697917 --- /dev/null +++ b/fs/fscache/main.c @@ -0,0 +1,105 @@ +/* main.c: general filesystem caching manager + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include "fscache-int.h" + +int fscache_debug; + +static int fscache_init(void); +static void fscache_exit(void); + +fs_initcall(fscache_init); +module_exit(fscache_exit); + +MODULE_DESCRIPTION("FS Cache Manager"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +static void fscache_ktype_release(struct kobject *kobject); + +static struct sysfs_ops fscache_sysfs_ops = { + .show = NULL, + .store = NULL, +}; + +static struct kobj_type fscache_ktype = { + .release = fscache_ktype_release, + .sysfs_ops = &fscache_sysfs_ops, + .default_attrs = NULL, +}; + +struct kset fscache_kset = { + .kobj.name = "fscache", + .kobj.kset = &fs_subsys.kset, + .ktype = &fscache_ktype, +}; + +EXPORT_SYMBOL(fscache_kset); + +/*****************************************************************************/ +/* + * initialise the fs caching module + */ +static int __init fscache_init(void) +{ + int ret; + + fscache_cookie_jar = + kmem_cache_create("fscache_cookie_jar", + sizeof(struct fscache_cookie), + 0, + 0, + fscache_cookie_init_once, + NULL); + + if (!fscache_cookie_jar) { + printk(KERN_NOTICE + "FS-Cache: Failed to allocate a cookie jar\n"); + return -ENOMEM; + } + + ret = kset_register(&fscache_kset); + if (ret < 0) { + kmem_cache_destroy(fscache_cookie_jar); + return ret; + } + + printk(KERN_NOTICE "FS-Cache: Loaded\n"); + return 0; + +} /* end fscache_init() */ + +/*****************************************************************************/ +/* + * clean up on module removal + */ +static void __exit fscache_exit(void) +{ + _enter(""); + + kset_unregister(&fscache_kset); + kmem_cache_destroy(fscache_cookie_jar); + printk(KERN_NOTICE "FS-Cache: unloaded\n"); + +} /* end fscache_exit() */ + +/*****************************************************************************/ +/* + * release the ktype + */ +static void fscache_ktype_release(struct kobject *kobject) +{ +} /* end fscache_ktype_release() */ diff --git a/fs/fscache/page.c b/fs/fscache/page.c new file mode 100644 index 0000000..8d8ade5 --- /dev/null +++ b/fs/fscache/page.c @@ -0,0 +1,548 @@ +/* page.c: general filesystem cache cookie management + * + * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include "fscache-int.h" + +/*****************************************************************************/ +/* + * set the data file size on an object in the cache + */ +int __fscache_set_i_size(struct fscache_cookie *cookie, loff_t i_size) +{ + struct fscache_object *object; + int ret; + + _enter("%p,%llu,", cookie, i_size); + + if (hlist_empty(&cookie->backing_objects)) { + _leave(" = -ENOBUFS"); + return -ENOBUFS; + } + + /* not supposed to use this for indexes */ + BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX); + + /* prevent the file from being uncached whilst we access it and exclude + * read and write attempts on pages + */ + down_write(&cookie->sem); + + ret = -ENOBUFS; + if (!hlist_empty(&cookie->backing_objects)) { + /* get and pin the backing object */ + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) + goto out; + + /* prevent the cache from being withdrawn */ + if (object->cache->ops->set_i_size && + fscache_operation_lock(object) + ) { + if (object->cache->ops->grab_object(object)) { + /* ask the cache to honour the operation */ + ret = object->cache->ops->set_i_size(object, + i_size); + + object->cache->ops->put_object(object); + } + + fscache_operation_unlock(object); + } + } + +out: + up_write(&cookie->sem); + _leave(" = %d", ret); + return ret; + +} /* end __fscache_set_i_size() */ + +EXPORT_SYMBOL(__fscache_set_i_size); + +/*****************************************************************************/ +/* + * reserve space for an object + */ +int __fscache_reserve_space(struct fscache_cookie *cookie, loff_t size) +{ + struct fscache_object *object; + int ret; + + _enter("%p,%llu,", cookie, size); + + if (hlist_empty(&cookie->backing_objects)) { + _leave(" = -ENOBUFS"); + return -ENOBUFS; + } + + /* not supposed to use this for indexes */ + BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX); + + /* prevent the file from being uncached whilst we access it and exclude + * read and write attempts on pages + */ + down_write(&cookie->sem); + + ret = -ENOBUFS; + if (!hlist_empty(&cookie->backing_objects)) { + /* get and pin the backing object */ + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) + goto out; + + if (!object->cache->ops->reserve_space) { + ret = -EOPNOTSUPP; + goto out; + } + + /* prevent the cache from being withdrawn */ + if (fscache_operation_lock(object)) { + if (object->cache->ops->grab_object(object)) { + /* ask the cache to honour the operation */ + ret = object->cache->ops->reserve_space(object, + size); + + object->cache->ops->put_object(object); + } + + fscache_operation_unlock(object); + } + } + +out: + up_write(&cookie->sem); + _leave(" = %d", ret); + return ret; + +} /* end __fscache_reserve_space() */ + +EXPORT_SYMBOL(__fscache_reserve_space); + +/*****************************************************************************/ +/* + * read a page from the cache or allocate a block in which to store it + * - we return: + * -ENOMEM - out of memory, nothing done + * -EINTR - interrupted + * -ENOBUFS - no backing object available in which to cache the block + * -ENODATA - no data available in the backing object for this block + * 0 - dispatched a read - it'll call end_io_func() when finished + */ +int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ + struct fscache_object *object; + int ret; + + _enter("%p,{%lu},", cookie, page->index); + + if (hlist_empty(&cookie->backing_objects)) { + _leave(" -ENOBUFS [no backing objects]"); + return -ENOBUFS; + } + + /* not supposed to use this for indexes */ + BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX); + + /* prevent the file from being uncached whilst we access it */ + down_read(&cookie->sem); + + ret = -ENOBUFS; + if (!hlist_empty(&cookie->backing_objects)) { + /* get and pin the backing object */ + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) + goto out; + + /* prevent the cache from being withdrawn */ + if (fscache_operation_lock(object)) { + if (object->cache->ops->grab_object(object)) { + /* ask the cache to honour the operation */ + ret = object->cache->ops->read_or_alloc_page( + object, + page, + end_io_func, + context, + gfp); + + object->cache->ops->put_object(object); + } + + fscache_operation_unlock(object); + } + } + +out: + up_read(&cookie->sem); + _leave(" = %d", ret); + return ret; + +} /* end __fscache_read_or_alloc_page() */ + +EXPORT_SYMBOL(__fscache_read_or_alloc_page); + +/*****************************************************************************/ +/* + * read a list of page from the cache or allocate a block in which to store + * them + * - we return: + * -ENOMEM - out of memory, some pages may be being read + * -EINTR - interrupted, some pages may be being read + * -ENOBUFS - no backing object or space available in which to cache any + * pages not being read + * -ENODATA - no data available in the backing object for some or all of + * the pages + * 0 - dispatched a read on all pages + * + * end_io_func() will be called for each page read from the cache as it is + * finishes being read + * + * any pages for which a read is dispatched will be removed from pages and + * nr_pages + */ +int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ + struct fscache_object *object; + int ret; + + _enter("%p,,%d,,,", cookie, *nr_pages); + + if (hlist_empty(&cookie->backing_objects)) { + _leave(" -ENOBUFS [no backing objects]"); + return -ENOBUFS; + } + + /* not supposed to use this for indexes */ + BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX); + BUG_ON(list_empty(pages)); + BUG_ON(*nr_pages <= 0); + + /* prevent the file from being uncached whilst we access it */ + down_read(&cookie->sem); + + ret = -ENOBUFS; + if (!hlist_empty(&cookie->backing_objects)) { + /* get and pin the backing object */ + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) + goto out; + + /* prevent the cache from being withdrawn */ + if (fscache_operation_lock(object)) { + if (object->cache->ops->grab_object(object)) { + /* ask the cache to honour the operation */ + ret = object->cache->ops->read_or_alloc_pages( + object, + mapping, + pages, + nr_pages, + end_io_func, + context, + gfp); + + object->cache->ops->put_object(object); + } + + fscache_operation_unlock(object); + } + } + +out: + up_read(&cookie->sem); + _leave(" = %d", ret); + return ret; + +} /* end __fscache_read_or_alloc_pages() */ + +EXPORT_SYMBOL(__fscache_read_or_alloc_pages); + +/*****************************************************************************/ +/* + * allocate a block in the cache on which to store a page + * - we return: + * -ENOMEM - out of memory, nothing done + * -EINTR - interrupted + * -ENOBUFS - no backing object available in which to cache the block + * 0 - block allocated + */ +int __fscache_alloc_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ + struct fscache_object *object; + int ret; + + _enter("%p,{%lu},", cookie, page->index); + + if (hlist_empty(&cookie->backing_objects)) { + _leave(" -ENOBUFS [no backing objects]"); + return -ENOBUFS; + } + + /* not supposed to use this for indexes */ + BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX); + + /* prevent the file from being uncached whilst we access it */ + down_read(&cookie->sem); + + ret = -ENOBUFS; + if (!hlist_empty(&cookie->backing_objects)) { + /* get and pin the backing object */ + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) + goto out; + + /* prevent the cache from being withdrawn */ + if (fscache_operation_lock(object)) { + if (object->cache->ops->grab_object(object)) { + /* ask the cache to honour the operation */ + ret = object->cache->ops->allocate_page(object, + page, + gfp); + + object->cache->ops->put_object(object); + } + + fscache_operation_unlock(object); + } + } + +out: + up_read(&cookie->sem); + _leave(" = %d", ret); + return ret; + +} /* end __fscache_alloc_page() */ + +EXPORT_SYMBOL(__fscache_alloc_page); + +/*****************************************************************************/ +/* + * request a page be stored in the cache + * - returns: + * -ENOMEM - out of memory, nothing done + * -EINTR - interrupted + * -ENOBUFS - no backing object available in which to cache the page + * 0 - dispatched a write - it'll call end_io_func() when finished + */ +int __fscache_write_page(struct fscache_cookie *cookie, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ + struct fscache_object *object; + int ret; + + _enter("%p,{%lu},", cookie, page->index); + + /* not supposed to use this for indexes */ + BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX); + + /* prevent the file from been uncached whilst we deal with it */ + down_read(&cookie->sem); + + ret = -ENOBUFS; + if (!hlist_empty(&cookie->backing_objects)) { + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) + goto out; + + /* prevent the cache from being withdrawn */ + if (fscache_operation_lock(object)) { + /* ask the cache to honour the operation */ + ret = object->cache->ops->write_page(object, + page, + end_io_func, + context, + gfp); + fscache_operation_unlock(object); + } + } + +out: + up_read(&cookie->sem); + _leave(" = %d", ret); + return ret; + +} /* end __fscache_write_page() */ + +EXPORT_SYMBOL(__fscache_write_page); + +/*****************************************************************************/ +/* + * request several pages be stored in the cache + * - returns: + * -ENOMEM - out of memory, nothing done + * -EINTR - interrupted + * -ENOBUFS - no backing object available in which to cache the page + * 0 - dispatched a write - it'll call end_io_func() when finished + */ +int __fscache_write_pages(struct fscache_cookie *cookie, + struct pagevec *pagevec, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ + struct fscache_object *object; + int ret; + + _enter("%p,{%ld},", cookie, pagevec->nr); + + /* not supposed to use this for indexes */ + BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX); + + /* prevent the file from been uncached whilst we deal with it */ + down_read(&cookie->sem); + + ret = -ENOBUFS; + if (!hlist_empty(&cookie->backing_objects)) { + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) + goto out; + + /* prevent the cache from being withdrawn */ + if (fscache_operation_lock(object)) { + /* ask the cache to honour the operation */ + ret = object->cache->ops->write_pages(object, + pagevec, + end_io_func, + context, + gfp); + fscache_operation_unlock(object); + } + } + +out: + up_read(&cookie->sem); + _leave(" = %d", ret); + return ret; + +} /* end __fscache_write_pages() */ + +EXPORT_SYMBOL(__fscache_write_pages); + +/*****************************************************************************/ +/* + * remove a page from the cache + */ +void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page) +{ + struct fscache_object *object; + struct pagevec pagevec; + + _enter(",{%lu}", page->index); + + /* not supposed to use this for indexes */ + BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX); + + if (hlist_empty(&cookie->backing_objects)) { + _leave(" [no backing]"); + return; + } + + pagevec_init(&pagevec, 0); + pagevec_add(&pagevec, page); + + /* ask the cache to honour the operation */ + down_read(&cookie->sem); + + if (!hlist_empty(&cookie->backing_objects)) { + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + /* prevent the cache from being withdrawn */ + if (fscache_operation_lock(object)) { + object->cache->ops->uncache_pages(object, &pagevec); + fscache_operation_unlock(object); + } + } + + up_read(&cookie->sem); + + _leave(""); + return; + +} /* end __fscache_uncache_page() */ + +EXPORT_SYMBOL(__fscache_uncache_page); + +/*****************************************************************************/ +/* + * remove a bunch of pages from the cache + */ +void __fscache_uncache_pages(struct fscache_cookie *cookie, + struct pagevec *pagevec) +{ + struct fscache_object *object; + + _enter(",{%ld}", pagevec->nr); + + BUG_ON(pagevec->nr <= 0); + BUG_ON(!pagevec->pages[0]); + + /* not supposed to use this for indexes */ + BUG_ON(cookie->def->type == FSCACHE_COOKIE_TYPE_INDEX); + + if (hlist_empty(&cookie->backing_objects)) { + _leave(" [no backing]"); + return; + } + + /* ask the cache to honour the operation */ + down_read(&cookie->sem); + + if (!hlist_empty(&cookie->backing_objects)) { + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + /* prevent the cache from being withdrawn */ + if (fscache_operation_lock(object)) { + object->cache->ops->uncache_pages(object, pagevec); + fscache_operation_unlock(object); + } + } + + up_read(&cookie->sem); + + _leave(""); + return; + +} /* end __fscache_uncache_pages() */ + +EXPORT_SYMBOL(__fscache_uncache_pages); diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 0b572a0..2af6f22 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -4,9 +4,9 @@ # obj-$(CONFIG_NFS_FS) += nfs.o -nfs-y := dir.o file.o inode.o super.o nfs2xdr.o pagelist.o \ - proc.o read.o symlink.o unlink.o write.o \ - namespace.o +nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ + pagelist.o proc.o read.o symlink.o unlink.o \ + write.o namespace.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o @@ -16,4 +16,5 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4x nfs4namespace.o nfs-$(CONFIG_NFS_DIRECTIO) += direct.o nfs-$(CONFIG_SYSCTL) += sysctl.o +nfs-$(CONFIG_NFS_FSCACHE) += fscache.o nfs-objs := $(nfs-y) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index fe0a6b8..a3ee113 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -19,6 +19,7 @@ #include #include "nfs4_fs.h" #include "callback.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -36,6 +37,21 @@ static struct svc_program nfs4_callback_ unsigned int nfs_callback_set_tcpport; unsigned short nfs_callback_tcpport; +static const int nfs_set_port_min = 0; +static const int nfs_set_port_max = 65535; + +static int param_set_port(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) + return -EINVAL; + *((int *)kp->arg) = num; + return 0; +} + +module_param_call(callback_tcpport, param_set_port, param_get_int, + &nfs_callback_set_tcpport, 0644); /* * This is the callback kernel thread. @@ -134,10 +150,8 @@ out_err: /* * Kill the server process if it is not already up. */ -int nfs_callback_down(void) +void nfs_callback_down(void) { - int ret = 0; - lock_kernel(); mutex_lock(&nfs_callback_mutex); nfs_callback_info.users--; @@ -149,20 +163,19 @@ int nfs_callback_down(void) } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0); mutex_unlock(&nfs_callback_mutex); unlock_kernel(); - return ret; } static int nfs_callback_authenticate(struct svc_rqst *rqstp) { - struct in_addr *addr = &rqstp->rq_addr.sin_addr; - struct nfs4_client *clp; + struct sockaddr_in *addr = &rqstp->rq_addr; + struct nfs_client *clp; /* Don't talk to strangers */ - clp = nfs4_find_client(addr); + clp = nfs_find_client(addr, 4); if (clp == NULL) return SVC_DROP; - dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr)); - nfs4_put_client(clp); + dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr->sin_addr)); + nfs_put_client(clp); switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: if (rqstp->rq_proc != CB_NULL) diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index b252e7f..5676163 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -62,8 +62,13 @@ struct cb_recallargs { extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy); +#ifdef CONFIG_NFS_V4 extern int nfs_callback_up(void); -extern int nfs_callback_down(void); +extern void nfs_callback_down(void); +#else +#define nfs_callback_up() (0) +#define nfs_callback_down() do {} while(0) +#endif extern unsigned int nfs_callback_set_tcpport; extern unsigned short nfs_callback_tcpport; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 7719483..97cf8f7 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -10,19 +10,20 @@ #include #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) { - struct nfs4_client *clp; + struct nfs_client *clp; struct nfs_delegation *delegation; struct nfs_inode *nfsi; struct inode *inode; res->bitmap[0] = res->bitmap[1] = 0; res->status = htonl(NFS4ERR_BADHANDLE); - clp = nfs4_find_client(&args->addr->sin_addr); + clp = nfs_find_client(args->addr, 4); if (clp == NULL) goto out; inode = nfs_delegation_find_inode(clp, &args->fh); @@ -48,7 +49,7 @@ out_iput: up_read(&nfsi->rwsem); iput(inode); out_putclient: - nfs4_put_client(clp); + nfs_put_client(clp); out: dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status)); return res->status; @@ -56,12 +57,12 @@ out: unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy) { - struct nfs4_client *clp; + struct nfs_client *clp; struct inode *inode; unsigned res; res = htonl(NFS4ERR_BADHANDLE); - clp = nfs4_find_client(&args->addr->sin_addr); + clp = nfs_find_client(args->addr, 4); if (clp == NULL) goto out; inode = nfs_delegation_find_inode(clp, &args->fh); @@ -80,7 +81,7 @@ unsigned nfs4_callback_recall(struct cb_ } iput(inode); out_putclient: - nfs4_put_client(clp); + nfs_put_client(clp); out: dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res)); return res; diff --git a/fs/nfs/client.c b/fs/nfs/client.c new file mode 100644 index 0000000..bf701f1 --- /dev/null +++ b/fs/nfs/client.c @@ -0,0 +1,1439 @@ +/* client.c: NFS client sharing and management code + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "nfs4_fs.h" +#include "callback.h" +#include "delegation.h" +#include "iostat.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_CLIENT + +static DEFINE_SPINLOCK(nfs_client_lock); +static LIST_HEAD(nfs_client_list); +static LIST_HEAD(nfs_volume_list); +static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); + +/* + * RPC cruft for NFS + */ +static struct rpc_version *nfs_version[5] = { + [2] = &nfs_version2, +#ifdef CONFIG_NFS_V3 + [3] = &nfs_version3, +#endif +#ifdef CONFIG_NFS_V4 + [4] = &nfs_version4, +#endif +}; + +struct rpc_program nfs_program = { + .name = "nfs", + .number = NFS_PROGRAM, + .nrvers = ARRAY_SIZE(nfs_version), + .version = nfs_version, + .stats = &nfs_rpcstat, + .pipe_dir_name = "/nfs", +}; + +struct rpc_stat nfs_rpcstat = { + .program = &nfs_program +}; + + +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static struct rpc_version * nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = ARRAY_SIZE(nfsacl_version), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; +#endif /* CONFIG_NFS_V3_ACL */ + +/* + * Allocate a shared client record + * + * Since these are allocated/deallocated very rarely, we don't + * bother putting them in a slab cache... + */ +static struct nfs_client *nfs_alloc_client(const char *hostname, + const struct sockaddr_in *addr, + int nfsversion) +{ + struct nfs_client *clp; + int error; + + if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) + goto error_0; + + error = rpciod_up(); + if (error < 0) { + dprintk("%s: couldn't start rpciod! Error = %d\n", + __FUNCTION__, error); + __set_bit(NFS_CS_RPCIOD, &clp->cl_svcs_state); + goto error_1; + } + + if (nfsversion == 4) { + if (nfs_callback_up() < 0) + goto error_2; + __set_bit(NFS_CS_CALLBACK, &clp->cl_svcs_state); + } + + atomic_set(&clp->cl_count, 1); + clp->cl_cons_state = NFS_CS_INITING; + + clp->cl_nfsversion = nfsversion; + memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); + + if (hostname) { + clp->cl_hostname = kstrdup(hostname, GFP_KERNEL); + if (!clp->cl_hostname) + goto error_3; + } + + INIT_LIST_HEAD(&clp->cl_superblocks); + clp->cl_rpcclient = ERR_PTR(-EINVAL); + +#ifdef CONFIG_NFS_V4 + init_rwsem(&clp->cl_sem); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_state_owners); + INIT_LIST_HEAD(&clp->cl_unused); + spin_lock_init(&clp->cl_lock); + INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); + rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); + clp->cl_boot_time = CURRENT_TIME; + clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; +#endif + + nfs_fscache_get_client_cookie(clp); + + return clp; + +error_3: + nfs_callback_down(); + __clear_bit(NFS_CS_CALLBACK, &clp->cl_svcs_state); +error_2: + rpciod_down(); + __clear_bit(NFS_CS_RPCIOD, &clp->cl_svcs_state); +error_1: + kfree(clp); +error_0: + return NULL; +} + +/* + * Destroy a shared client record + */ +static void nfs_free_client(struct nfs_client *clp) +{ + dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion); + +#ifdef CONFIG_NFS_V4 + if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_svcs_state)) { + while (!list_empty(&clp->cl_unused)) { + struct nfs4_state_owner *sp; + + sp = list_entry(clp->cl_unused.next, + struct nfs4_state_owner, + so_list); + list_del(&sp->so_list); + kfree(sp); + } + BUG_ON(!list_empty(&clp->cl_state_owners)); + nfs_idmap_delete(clp); + } +#endif + + nfs_fscache_release_client_cookie(clp); + + /* -EIO all pending I/O */ + if (!IS_ERR(clp->cl_rpcclient)) + rpc_shutdown_client(clp->cl_rpcclient); + + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_svcs_state)) + nfs_callback_down(); + + if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_svcs_state)) + rpciod_down(); + + kfree(clp->cl_hostname); + kfree(clp); + + dprintk("<-- nfs_free_client()\n"); +} + +/* + * Release a reference to a shared client record + */ +void nfs_put_client(struct nfs_client *clp) +{ + dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); + + if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { + list_del(&clp->cl_share_link); + spin_unlock(&nfs_client_lock); + + BUG_ON(!list_empty(&clp->cl_superblocks)); + + nfs_free_client(clp); + } +} + +/* + * Find a client by address + * - caller must hold nfs_client_lock + */ +static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion) +{ + struct nfs_client *clp; + + list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + /* Different NFS versions cannot share the same nfs_client */ + if (clp->cl_nfsversion != nfsversion) + continue; + + if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr, + sizeof(clp->cl_addr.sin_addr)) != 0) + continue; + + if (clp->cl_addr.sin_port == addr->sin_port) + goto found; + } + + return NULL; + +found: + atomic_inc(&clp->cl_count); + return clp; +} + +/* + * Find a client by IP address and protocol version + * - returns NULL if no such client + */ +struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion) +{ + struct nfs_client *clp; + + spin_lock(&nfs_client_lock); + clp = __nfs_find_client(addr, nfsversion); + spin_unlock(&nfs_client_lock); + + BUG_ON(clp->cl_cons_state == 0); + + return clp; +} + +/* + * Look up a client by IP address and protocol version + * - creates a new record if one doesn't yet exist + */ +static struct nfs_client *nfs_get_client(const char *hostname, + const struct sockaddr_in *addr, + int nfsversion) +{ + struct nfs_client *clp, *new = NULL; + int error; + + dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n", + hostname ?: "", NIPQUAD(addr->sin_addr), + addr->sin_port, nfsversion); + + /* see if the client already exists */ + do { + spin_lock(&nfs_client_lock); + + clp = __nfs_find_client(addr, nfsversion); + if (clp) + goto found_client; + if (new) + goto install_client; + + spin_unlock(&nfs_client_lock); + + new = nfs_alloc_client(hostname, addr, nfsversion); + } while (new); + + return ERR_PTR(-ENOMEM); + + /* install a new client and return with it unready */ +install_client: + clp = new; + list_add(&clp->cl_share_link, &nfs_client_list); + spin_unlock(&nfs_client_lock); + dprintk("--> nfs_get_client() = %p [new]\n", clp); + return clp; + + /* found an existing client + * - make sure it's ready before returning + */ +found_client: + spin_unlock(&nfs_client_lock); + + if (new) + nfs_free_client(new); + + if (clp->cl_cons_state == NFS_CS_INITING) { + DECLARE_WAITQUEUE(myself, current); + + add_wait_queue(&nfs_client_active_wq, &myself); + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current) || + clp->cl_cons_state > NFS_CS_READY) + break; + schedule(); + } + + remove_wait_queue(&nfs_client_active_wq, &myself); + + if (signal_pending(current)) { + nfs_put_client(clp); + return ERR_PTR(-ERESTARTSYS); + } + } + + if (clp->cl_cons_state < NFS_CS_READY) { + error = clp->cl_cons_state; + nfs_put_client(clp); + return ERR_PTR(error); + } + + BUG_ON(clp->cl_cons_state != NFS_CS_READY); + + dprintk("--> nfs_get_client() = %p [share]\n", clp); + return clp; +} + +/* + * Mark a server as ready or failed + */ +static void nfs_mark_client_ready(struct nfs_client *clp, int state) +{ + clp->cl_cons_state = state; + wake_up_all(&nfs_client_active_wq); +} + +/* + * Initialise the timeout values for a connection + */ +static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, + unsigned int timeo, unsigned int retrans) +{ + to->to_initval = timeo * HZ / 10; + to->to_retries = retrans; + if (!to->to_retries) + to->to_retries = 2; + + switch (proto) { + case IPPROTO_TCP: + if (!to->to_initval) + to->to_initval = 60 * HZ; + if (to->to_initval > NFS_MAX_TCP_TIMEOUT) + to->to_initval = NFS_MAX_TCP_TIMEOUT; + to->to_increment = to->to_initval; + to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); + to->to_exponential = 0; + break; + case IPPROTO_UDP: + default: + if (!to->to_initval) + to->to_initval = 11 * HZ / 10; + if (to->to_initval > NFS_MAX_UDP_TIMEOUT) + to->to_initval = NFS_MAX_UDP_TIMEOUT; + to->to_maxval = NFS_MAX_UDP_TIMEOUT; + to->to_exponential = 1; + break; + } +} + +/* + * Create an RPC client handle + */ +static int nfs_create_rpc_client(struct nfs_client *clp, int proto, + unsigned int timeo, + unsigned int retrans, + rpc_authflavor_t flavor) +{ + struct rpc_timeout timeparms; + struct rpc_xprt *xprt = NULL; + struct rpc_clnt *clnt = NULL; + + if (!IS_ERR(clp->cl_rpcclient)) + return 0; + nfs_init_timeout_values(&timeparms, proto, timeo, retrans); + + clp->retrans_timeo = timeparms.to_initval; + clp->retrans_count = timeparms.to_retries; + + /* create transport and client */ + xprt = xprt_create_proto(proto, &clp->cl_addr, &timeparms); + if (IS_ERR(xprt)) { + dprintk("%s: cannot create RPC transport. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); + return PTR_ERR(xprt); + } + + /* Bind to a reserved port! */ + xprt->resvport = 1; + /* Create the client RPC handle */ + clnt = rpc_create_client(xprt, clp->cl_hostname, &nfs_program, + clp->rpc_ops->version, RPC_AUTH_UNIX); + if (IS_ERR(clnt)) { + dprintk("%s: cannot create RPC client. Error = %ld\n", + __FUNCTION__, PTR_ERR(clnt)); + return PTR_ERR(clnt); + } + + clnt->cl_intr = 1; + clnt->cl_softrtry = 1; + clp->cl_rpcclient = clnt; + return 0; +} + +/* + * Version 2 or 3 client destruction + */ +static void nfs_destroy_server(struct nfs_server *server) +{ + if (!IS_ERR(server->client_acl)) + rpc_shutdown_client(server->client_acl); + + if (!(server->flags & NFS_MOUNT_NONLM)) + lockd_down(); /* release rpc.lockd */ +} + +/* + * Version 2 or 3 lockd setup + */ +static int nfs_start_lockd(struct nfs_server *server) +{ + int error = 0; + + if (server->nfs_client->cl_nfsversion > 3) + goto out; + if (server->flags & NFS_MOUNT_NONLM) + goto out; + error = lockd_up(); + if (error < 0) + server->flags |= NFS_MOUNT_NONLM; + else + server->destroy = nfs_destroy_server; +out: + return error; +} + +/* + * Initialise an NFSv3 ACL client connection + */ +#ifdef CONFIG_NFS_V3_ACL +static void nfs_init_server_aclclient(struct nfs_server *server) +{ + if (server->nfs_client->cl_nfsversion != 3) + goto out_noacl; + if (server->flags & NFS_MOUNT_NOACL) + goto out_noacl; + + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + if (IS_ERR(server->client_acl)) + goto out_noacl; + + /* No errors! Assume that Sun nfsacls are supported */ + server->caps |= NFS_CAP_ACLS; + return; + +out_noacl: + server->caps &= ~NFS_CAP_ACLS; +} +#else +static inline void nfs_init_server_aclclient(struct nfs_server *server) +{ + server->flags &= ~NFS_MOUNT_NOACL; + server->caps &= ~NFS_CAP_ACLS; +} +#endif + +/* + * Create a general RPC client + */ +static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour) +{ + struct nfs_client *clp = server->nfs_client; + + server->client = rpc_clone_client(clp->cl_rpcclient); + if (IS_ERR(server->client)) { + dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__); + return PTR_ERR(server->client); + } + + if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) { + struct rpc_auth *auth; + + auth = rpcauth_create(pseudoflavour, server->client); + if (IS_ERR(auth)) { + dprintk("%s: couldn't create credcache!\n", __FUNCTION__); + return PTR_ERR(auth); + } + } + server->client->cl_softrtry = 0; + if (server->flags & NFS_MOUNT_SOFT) + server->client->cl_softrtry = 1; + + server->client->cl_intr = 0; + if (server->flags & NFS4_MOUNT_INTR) + server->client->cl_intr = 1; + + return 0; +} + +/* + * Initialise an NFS2 or NFS3 client + */ +static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data) +{ + int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; + int error; + + if (clp->cl_cons_state == NFS_CS_READY) { + /* the client is already initialised */ + dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp); + return 0; + } + + /* Check NFS protocol revision and initialize RPC op vector */ + clp->rpc_ops = &nfs_v2_clientops; +#ifdef CONFIG_NFS_V3 + if (clp->cl_nfsversion == 3) + clp->rpc_ops = &nfs_v3_clientops; +#endif + /* + * Create a client RPC handle for doing FSSTAT with UNIX auth only + * - RFC 2623, sec 2.3.2 + */ + error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans, + RPC_AUTH_UNIX); + if (error < 0) + goto error; + nfs_mark_client_ready(clp, NFS_CS_READY); + return 0; + +error: + nfs_mark_client_ready(clp, error); + dprintk("<-- nfs_init_client() = xerror %d\n", error); + return error; +} + +/* + * Create a version 2 or 3 client + */ +static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data) +{ + struct nfs_client *clp; + int error, nfsvers = 2; + + dprintk("--> nfs_init_server()\n"); + +#ifdef CONFIG_NFS_V3 + if (data->flags & NFS_MOUNT_VER3) + nfsvers = 3; +#endif + + /* Allocate or find a client reference we can use */ + clp = nfs_get_client(data->hostname, &data->addr, nfsvers); + if (IS_ERR(clp)) { + dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); + return PTR_ERR(clp); + } + + error = nfs_init_client(clp, data); + if (error < 0) + goto error; + + server->nfs_client = clp; + + /* Initialise the client representation from the mount data */ + server->flags = data->flags & NFS_MOUNT_FLAGMASK; + + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); + + server->acregmin = data->acregmin * HZ; + server->acregmax = data->acregmax * HZ; + server->acdirmin = data->acdirmin * HZ; + server->acdirmax = data->acdirmax * HZ; + + /* Start lockd here, before we might error out */ + error = nfs_start_lockd(server); + if (error < 0) + goto error; + + error = nfs_init_server_rpcclient(server, data->pseudoflavor); + if (error < 0) + goto error; + + server->namelen = data->namlen; + /* Create a client RPC handle for the NFSv3 ACL management interface */ + nfs_init_server_aclclient(server); + if (clp->cl_nfsversion == 3) { + if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) + server->namelen = NFS3_MAXNAMLEN; + server->caps |= NFS_CAP_READDIRPLUS; + } else { + if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) + server->namelen = NFS2_MAXNAMLEN; + } + + dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp); + return 0; + +error: + server->nfs_client = NULL; + nfs_put_client(clp); + dprintk("<-- nfs_init_server() = xerror %d\n", error); + return error; +} + +/* + * Load up the server record from information gained in an fsinfo record + */ +static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo) +{ + unsigned long max_rpc_payload; + + /* Work out a lot of parameters */ + if (server->rsize == 0) + server->rsize = nfs_block_size(fsinfo->rtpref, NULL); + if (server->wsize == 0) + server->wsize = nfs_block_size(fsinfo->wtpref, NULL); + + if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax) + server->rsize = nfs_block_size(fsinfo->rtmax, NULL); + if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax) + server->wsize = nfs_block_size(fsinfo->wtmax, NULL); + + max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); + if (server->rsize > max_rpc_payload) + server->rsize = max_rpc_payload; + if (server->rsize > NFS_MAX_FILE_IO_SIZE) + server->rsize = NFS_MAX_FILE_IO_SIZE; + server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; + + if (server->wsize > max_rpc_payload) + server->wsize = max_rpc_payload; + if (server->wsize > NFS_MAX_FILE_IO_SIZE) + server->wsize = NFS_MAX_FILE_IO_SIZE; + server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); + + server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); + if (server->dtsize > PAGE_CACHE_SIZE) + server->dtsize = PAGE_CACHE_SIZE; + if (server->dtsize > server->rsize) + server->dtsize = server->rsize; + + if (server->flags & NFS_MOUNT_NOAC) { + server->acregmin = server->acregmax = 0; + server->acdirmin = server->acdirmax = 0; + } + + server->maxfilesize = fsinfo->maxfilesize; + + /* We're airborne Set socket buffersize */ + rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); +} + +/* + * Probe filesystem information, including the FSID on v2/v3 + */ +static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) +{ + struct nfs_fsinfo fsinfo; + struct nfs_client *clp = server->nfs_client; + int error; + + dprintk("--> nfs_probe_fsinfo()\n"); + + if (clp->rpc_ops->set_capabilities != NULL) { + error = clp->rpc_ops->set_capabilities(server, mntfh); + if (error < 0) + goto out_error; + } + + fsinfo.fattr = fattr; + nfs_fattr_init(fattr); + error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); + if (error < 0) + goto out_error; + + nfs_server_set_fsinfo(server, &fsinfo); + + /* Get some general file system info */ + if (server->namelen == 0) { + struct nfs_pathconf pathinfo; + + pathinfo.fattr = fattr; + nfs_fattr_init(fattr); + + if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0) + server->namelen = pathinfo.max_namelen; + } + + dprintk("<-- nfs_probe_fsinfo() = 0\n"); + return 0; + +out_error: + dprintk("nfs_probe_fsinfo: error = %d\n", -error); + return error; +} + +/* + * Copy useful information when duplicating a server record + */ +static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) +{ + target->flags = source->flags; + target->acregmin = source->acregmin; + target->acregmax = source->acregmax; + target->acdirmin = source->acdirmin; + target->acdirmax = source->acdirmax; + target->caps = source->caps; +} + +/* + * Allocate and initialise a server record + */ +static struct nfs_server *nfs_alloc_server(void) +{ + struct nfs_server *server; + + server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (!server) + return NULL; + + server->client = server->client_acl = ERR_PTR(-EINVAL); + + /* Zero out the NFS state stuff */ + INIT_LIST_HEAD(&server->client_link); + INIT_LIST_HEAD(&server->master_link); + + server->io_stats = nfs_alloc_iostats(); + if (!server->io_stats) { + kfree(server); + return NULL; + } + + return server; +} + +/* + * Free up a server record + */ +void nfs_free_server(struct nfs_server *server) +{ + dprintk("--> nfs_free_server()\n"); + + spin_lock(&nfs_client_lock); + list_del(&server->client_link); + list_del(&server->master_link); + spin_unlock(&nfs_client_lock); + + if (server->destroy != NULL) + server->destroy(server); + if (!IS_ERR(server->client)) + rpc_shutdown_client(server->client); + + nfs_put_client(server->nfs_client); + + nfs_free_iostats(server->io_stats); + kfree(server); + nfs_release_automount_timer(); + dprintk("<-- nfs_free_server()\n"); +} + +/* + * Create a version 2 or 3 volume record + * - keyed on server and FSID + */ +struct nfs_server *nfs_create_server(const struct nfs_mount_data *data, + struct nfs_fh *mntfh) +{ + struct nfs_server *server; + struct nfs_fattr fattr; + int error; + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* Get a client representation */ + error = nfs_init_server(server, data); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + /* Probe the root fh to retrieve its FSID */ + error = nfs_probe_fsinfo(server, mntfh, &fattr); + if (error < 0) + goto error; + if (!(fattr.valid & NFS_ATTR_FATTR)) { + error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr); + if (error < 0) { + dprintk("nfs_create_server: getattr error = %d\n", -error); + goto error; + } + } + memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid)); + + dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor); + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + return server; + +error: + nfs_free_server(server); + return ERR_PTR(error); +} + +#ifdef CONFIG_NFS_V4 +/* + * Initialise an NFS4 client record + */ +static int nfs4_init_client(struct nfs_client *clp, + int proto, int timeo, int retrans, + rpc_authflavor_t authflavour) +{ + int error; + + if (clp->cl_cons_state == NFS_CS_READY) { + /* the client is initialised already */ + dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); + return 0; + } + + /* Check NFS protocol revision and initialize RPC op vector */ + clp->rpc_ops = &nfs_v4_clientops; + + error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour); + if (error < 0) + goto error; + + error = nfs_idmap_new(clp); + if (error < 0) { + dprintk("%s: failed to create idmapper. Error = %d\n", + __FUNCTION__, error); + __set_bit(NFS_CS_IDMAP, &clp->cl_svcs_state); + goto error; + } + + nfs_mark_client_ready(clp, NFS_CS_READY); + return 0; + +error: + nfs_mark_client_ready(clp, error); + dprintk("<-- nfs4_init_client() = xerror %d\n", error); + return error; +} + +/* + * Set up an NFS4 client + */ +static int nfs4_set_client(struct nfs_server *server, + const char *hostname, const struct sockaddr_in *addr, + rpc_authflavor_t authflavour, + int proto, int timeo, int retrans) +{ + struct nfs_client *clp; + int error; + + dprintk("--> nfs4_set_client()\n"); + + /* Allocate or find a client reference we can use */ + clp = nfs_get_client(hostname, addr, 4); + if (IS_ERR(clp)) { + error = PTR_ERR(clp); + goto error; + } + error = nfs4_init_client(clp, proto, timeo, retrans, authflavour); + if (error < 0) + goto error_put; + + server->nfs_client = clp; + dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); + return 0; + +error_put: + nfs_put_client(clp); +error: + dprintk("<-- nfs4_set_client() = xerror %d\n", error); + return error; +} + +/* + * Create a version 4 volume record + */ +static int nfs4_init_server(struct nfs_server *server, + const struct nfs4_mount_data *data, rpc_authflavor_t authflavour) +{ + int error; + + dprintk("--> nfs4_init_server()\n"); + + /* Initialise the client representation from the mount data */ + server->flags = data->flags & NFS_MOUNT_FLAGMASK; + server->caps |= NFS_CAP_ATOMIC_OPEN; + + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); + + server->acregmin = data->acregmin * HZ; + server->acregmax = data->acregmax * HZ; + server->acdirmin = data->acdirmin * HZ; + server->acdirmax = data->acdirmax * HZ; + + error = nfs_init_server_rpcclient(server, authflavour); + + /* Done */ + dprintk("<-- nfs4_init_server() = %d\n", error); + return error; +} + +/* + * Create a version 4 volume record + * - keyed on server and FSID + */ +struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, + const char *hostname, + const struct sockaddr_in *addr, + const char *mntpath, + const char *ip_addr, + rpc_authflavor_t authflavour, + struct nfs_fh *mntfh) +{ + struct nfs_fattr fattr; + struct nfs_server *server; + int error; + + dprintk("--> nfs4_create_server()\n"); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* Get a client record */ + error = nfs4_set_client(server, hostname, addr, authflavour, + data->proto, data->timeo, data->retrans); + if (error < 0) + goto error; + + /* set up the general RPC client */ + error = nfs4_init_server(server, data, authflavour); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + /* Probe the root fh to retrieve its FSID */ + error = nfs4_path_walk(server, mntfh, mntpath); + if (error < 0) + goto error; + + dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor); + dprintk("Mount FH: %d\n", mntfh->size); + + error = nfs_probe_fsinfo(server, mntfh, &fattr); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + dprintk("<-- nfs4_create_server() = %p\n", server); + return server; + +error: + nfs_free_server(server); + dprintk("<-- nfs4_create_server() = error %d\n", error); + return ERR_PTR(error); +} + +/* + * Create an NFS4 referral server record + */ +struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, + struct nfs_fh *fh) +{ + struct nfs_client *parent_client; + struct nfs_server *server, *parent_server; + struct nfs_fattr fattr; + int error; + + dprintk("--> nfs4_create_referral_server()\n"); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + parent_server = NFS_SB(data->sb); + parent_client = parent_server->nfs_client; + + /* Get a client representation. + * Note: NFSv4 always uses TCP, */ + error = nfs4_set_client(server, data->hostname, data->addr, + data->authflavor, + parent_server->client->cl_xprt->prot, + parent_client->retrans_timeo, + parent_client->retrans_count); + + /* Initialise the client representation from the parent server */ + nfs_server_copy_userdata(server, parent_server); + server->caps |= NFS_CAP_ATOMIC_OPEN; + + error = nfs_init_server_rpcclient(server, data->authflavor); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + /* probe the filesystem info for this server filesystem */ + error = nfs_probe_fsinfo(server, fh, &fattr); + if (error < 0) + goto error; + + dprintk("Referral FSID: %llx:%llx\n", + server->fsid.major, server->fsid.minor); + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + + dprintk("<-- nfs_create_referral_server() = %p\n", server); + return server; + +error: + nfs_free_server(server); + dprintk("<-- nfs4_create_referral_server() = error %d\n", error); + return ERR_PTR(error); +} + +#endif /* CONFIG_NFS_V4 */ + +/* + * Clone an NFS2, NFS3 or NFS4 server record + */ +struct nfs_server *nfs_clone_server(struct nfs_server *source, + struct nfs_fh *fh, + struct nfs_fattr *fattr) +{ + struct nfs_server *server; + struct nfs_fattr fattr_fsinfo; + int error; + + dprintk("--> nfs_clone_server(,%llx:%llx,)\n", + fattr->fsid.major, fattr->fsid.minor); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* Copy data from the source */ + server->nfs_client = source->nfs_client; + atomic_inc(&server->nfs_client->cl_count); + nfs_server_copy_userdata(server, source); + + server->fsid = fattr->fsid; + + error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor); + if (error < 0) + goto out_free_server; + if (!IS_ERR(source->client_acl)) + nfs_init_server_aclclient(server); + + /* probe the filesystem info for this server filesystem */ + error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo); + if (error < 0) + goto out_free_server; + + dprintk("Cloned FSID: %llx:%llx\n", + server->fsid.major, server->fsid.minor); + + error = nfs_start_lockd(server); + if (error < 0) + goto out_free_server; + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + + dprintk("<-- nfs_clone_server() = %p\n", server); + return server; + +out_free_server: + nfs_free_server(server); + dprintk("<-- nfs_clone_server() = error %d\n", error); + return ERR_PTR(error); +} + +#ifdef CONFIG_PROC_FS +static struct proc_dir_entry *proc_fs_nfs; + +static int nfs_server_list_open(struct inode *inode, struct file *file); +static void *nfs_server_list_start(struct seq_file *p, loff_t *pos); +static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); +static void nfs_server_list_stop(struct seq_file *p, void *v); +static int nfs_server_list_show(struct seq_file *m, void *v); + +static struct seq_operations nfs_server_list_ops = { + .start = nfs_server_list_start, + .next = nfs_server_list_next, + .stop = nfs_server_list_stop, + .show = nfs_server_list_show, +}; + +static struct file_operations nfs_server_list_fops = { + .open = nfs_server_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int nfs_volume_list_open(struct inode *inode, struct file *file); +static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos); +static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos); +static void nfs_volume_list_stop(struct seq_file *p, void *v); +static int nfs_volume_list_show(struct seq_file *m, void *v); + +static struct seq_operations nfs_volume_list_ops = { + .start = nfs_volume_list_start, + .next = nfs_volume_list_next, + .stop = nfs_volume_list_stop, + .show = nfs_volume_list_show, +}; + +static struct file_operations nfs_volume_list_fops = { + .open = nfs_volume_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/* + * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which + * we're dealing + */ +static int nfs_server_list_open(struct inode *inode, struct file *file) +{ + struct seq_file *m; + int ret; + + ret = seq_open(file, &nfs_server_list_ops); + if (ret < 0) + return ret; + + m = file->private_data; + m->private = PDE(inode)->data; + + return 0; +} + +/* + * set up the iterator to start reading from the server list and return the first item + */ +static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) +{ + struct list_head *_p; + loff_t pos = *_pos; + + /* lock the list against modification */ + spin_lock(&nfs_client_lock); + + /* allow for the header line */ + if (!pos) + return SEQ_START_TOKEN; + pos--; + + /* find the n'th element in the list */ + list_for_each(_p, &nfs_client_list) + if (!pos--) + break; + + return _p != &nfs_client_list ? _p : NULL; +} + +/* + * move to next server + */ +static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) +{ + struct list_head *_p; + + (*pos)++; + + _p = v; + _p = (v == SEQ_START_TOKEN) ? nfs_client_list.next : _p->next; + + return _p != &nfs_client_list ? _p : NULL; +} + +/* + * clean up after reading from the transports list + */ +static void nfs_server_list_stop(struct seq_file *p, void *v) +{ + spin_unlock(&nfs_client_lock); +} + +/* + * display a header line followed by a load of call lines + */ +static int nfs_server_list_show(struct seq_file *m, void *v) +{ + struct nfs_client *clp; + + /* display header on line 1 */ + if (v == SEQ_START_TOKEN) { + seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); + return 0; + } + + /* display one transport per line on subsequent lines */ + clp = list_entry(v, struct nfs_client, cl_share_link); + + seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n", + clp->cl_nfsversion, + NIPQUAD(clp->cl_addr.sin_addr), + ntohs(clp->cl_addr.sin_port), + atomic_read(&clp->cl_count), + clp->cl_hostname); + + return 0; +} + +/* + * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes + */ +static int nfs_volume_list_open(struct inode *inode, struct file *file) +{ + struct seq_file *m; + int ret; + + ret = seq_open(file, &nfs_volume_list_ops); + if (ret < 0) + return ret; + + m = file->private_data; + m->private = PDE(inode)->data; + + return 0; +} + +/* + * set up the iterator to start reading from the volume list and return the first item + */ +static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) +{ + struct list_head *_p; + loff_t pos = *_pos; + + /* lock the list against modification */ + spin_lock(&nfs_client_lock); + + /* allow for the header line */ + if (!pos) + return SEQ_START_TOKEN; + pos--; + + /* find the n'th element in the list */ + list_for_each(_p, &nfs_volume_list) + if (!pos--) + break; + + return _p != &nfs_volume_list ? _p : NULL; +} + +/* + * move to next volume + */ +static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) +{ + struct list_head *_p; + + (*pos)++; + + _p = v; + _p = (v == SEQ_START_TOKEN) ? nfs_volume_list.next : _p->next; + + return _p != &nfs_volume_list ? _p : NULL; +} + +/* + * clean up after reading from the transports list + */ +static void nfs_volume_list_stop(struct seq_file *p, void *v) +{ + spin_unlock(&nfs_client_lock); +} + +/* + * display a header line followed by a load of call lines + */ +static int nfs_volume_list_show(struct seq_file *m, void *v) +{ + struct nfs_server *server; + struct nfs_client *clp; + char dev[8], fsid[17]; + + /* display header on line 1 */ + if (v == SEQ_START_TOKEN) { + seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); + return 0; + } + /* display one transport per line on subsequent lines */ + server = list_entry(v, struct nfs_server, master_link); + clp = server->nfs_client; + + snprintf(dev, 8, "%u:%u", + MAJOR(server->s_dev), MINOR(server->s_dev)); + + snprintf(fsid, 17, "%llx:%llx", + server->fsid.major, server->fsid.minor); + + seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s %s\n", + clp->cl_nfsversion, + NIPQUAD(clp->cl_addr.sin_addr), + ntohs(clp->cl_addr.sin_port), + dev, + fsid, + nfs_server_fscache_state(server)); + + return 0; +} + +/* + * initialise the /proc/fs/nfsfs/ directory + */ +int __init nfs_fs_proc_init(void) +{ + struct proc_dir_entry *p; + + proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs); + if (!proc_fs_nfs) + goto error_0; + + proc_fs_nfs->owner = THIS_MODULE; + + /* a file of servers with which we're dealing */ + p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs); + if (!p) + goto error_1; + + p->proc_fops = &nfs_server_list_fops; + p->owner = THIS_MODULE; + + /* a file of volumes that we have mounted */ + p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs); + if (!p) + goto error_2; + + p->proc_fops = &nfs_volume_list_fops; + p->owner = THIS_MODULE; + return 0; + +error_2: + remove_proc_entry("servers", proc_fs_nfs); +error_1: + remove_proc_entry("nfsfs", proc_root_fs); +error_0: + return -ENOMEM; +} + +/* + * clean up the /proc/fs/nfsfs/ directory + */ +void nfs_fs_proc_exit(void) +{ + remove_proc_entry("volumes", proc_fs_nfs); + remove_proc_entry("servers", proc_fs_nfs); + remove_proc_entry("nfsfs", proc_root_fs); +} + +#endif /* CONFIG_PROC_FS */ diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 9540a31..5713367 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -18,6 +18,7 @@ #include #include "nfs4_fs.h" #include "delegation.h" +#include "internal.h" static struct nfs_delegation *nfs_alloc_delegation(void) { @@ -52,7 +53,7 @@ static int nfs_delegation_claim_locks(st case -NFS4ERR_EXPIRED: /* kill_proc(fl->fl_pid, SIGLOST, 1); */ case -NFS4ERR_STALE_CLIENTID: - nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs4_state); + nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client); goto out_err; } } @@ -114,7 +115,7 @@ void nfs_inode_reclaim_delegation(struct */ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) { - struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int status = 0; @@ -145,7 +146,7 @@ int nfs_inode_set_delegation(struct inod sizeof(delegation->stateid)) != 0 || delegation->type != nfsi->delegation->type) { printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n", - __FUNCTION__, NIPQUAD(clp->cl_addr)); + __FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr)); status = -EIO; } } @@ -176,7 +177,7 @@ static void nfs_msync_inode(struct inode */ int __nfs_inode_return_delegation(struct inode *inode) { - struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int res = 0; @@ -208,7 +209,7 @@ int __nfs_inode_return_delegation(struct */ void nfs_return_all_delegations(struct super_block *sb) { - struct nfs4_client *clp = NFS_SB(sb)->nfs4_state; + struct nfs_client *clp = NFS_SB(sb)->nfs_client; struct nfs_delegation *delegation; struct inode *inode; @@ -232,7 +233,7 @@ restart: int nfs_do_expire_all_delegations(void *ptr) { - struct nfs4_client *clp = ptr; + struct nfs_client *clp = ptr; struct nfs_delegation *delegation; struct inode *inode; @@ -254,11 +255,11 @@ restart: } out: spin_unlock(&clp->cl_lock); - nfs4_put_client(clp); + nfs_put_client(clp); module_put_and_exit(0); } -void nfs_expire_all_delegations(struct nfs4_client *clp) +void nfs_expire_all_delegations(struct nfs_client *clp) { struct task_struct *task; @@ -266,17 +267,17 @@ void nfs_expire_all_delegations(struct n atomic_inc(&clp->cl_count); task = kthread_run(nfs_do_expire_all_delegations, clp, "%u.%u.%u.%u-delegreturn", - NIPQUAD(clp->cl_addr)); + NIPQUAD(clp->cl_addr.sin_addr)); if (!IS_ERR(task)) return; - nfs4_put_client(clp); + nfs_put_client(clp); module_put(THIS_MODULE); } /* * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. */ -void nfs_handle_cb_pathdown(struct nfs4_client *clp) +void nfs_handle_cb_pathdown(struct nfs_client *clp) { struct nfs_delegation *delegation; struct inode *inode; @@ -299,7 +300,7 @@ restart: struct recall_threadargs { struct inode *inode; - struct nfs4_client *clp; + struct nfs_client *clp; const nfs4_stateid *stateid; struct completion started; @@ -310,7 +311,7 @@ static int recall_thread(void *data) { struct recall_threadargs *args = (struct recall_threadargs *)data; struct inode *inode = igrab(args->inode); - struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; @@ -371,7 +372,7 @@ out_module_put: /* * Retrieve the inode associated with a delegation */ -struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle) +struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle) { struct nfs_delegation *delegation; struct inode *res = NULL; @@ -389,7 +390,7 @@ struct inode *nfs_delegation_find_inode( /* * Mark all delegations as needing to be reclaimed */ -void nfs_delegation_mark_reclaim(struct nfs4_client *clp) +void nfs_delegation_mark_reclaim(struct nfs_client *clp) { struct nfs_delegation *delegation; spin_lock(&clp->cl_lock); @@ -401,7 +402,7 @@ void nfs_delegation_mark_reclaim(struct /* * Reap all unclaimed delegations after reboot recovery is done */ -void nfs_delegation_reap_unclaimed(struct nfs4_client *clp) +void nfs_delegation_reap_unclaimed(struct nfs_client *clp) { struct nfs_delegation *delegation, *n; LIST_HEAD(head); @@ -423,7 +424,7 @@ void nfs_delegation_reap_unclaimed(struc int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) { - struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int res = 0; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 3858694..2cfd4b2 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -29,13 +29,13 @@ void nfs_inode_reclaim_delegation(struct int __nfs_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); -struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); +struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); void nfs_return_all_delegations(struct super_block *sb); -void nfs_expire_all_delegations(struct nfs4_client *clp); -void nfs_handle_cb_pathdown(struct nfs4_client *clp); +void nfs_expire_all_delegations(struct nfs_client *clp); +void nfs_handle_cb_pathdown(struct nfs_client *clp); -void nfs_delegation_mark_reclaim(struct nfs4_client *clp); -void nfs_delegation_reap_unclaimed(struct nfs4_client *clp); +void nfs_delegation_mark_reclaim(struct nfs_client *clp); +void nfs_delegation_reap_unclaimed(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e7ffb4d..7bed6c0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -31,6 +31,7 @@ #include #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -870,14 +871,14 @@ int nfs_is_exclusive_create(struct inode return (nd->intent.open.flags & O_EXCL) != 0; } -static inline int nfs_reval_fsid(struct inode *dir, - struct nfs_fh *fh, struct nfs_fattr *fattr) +static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir, + struct nfs_fh *fh, struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) /* Revalidate fsid on root dir */ - return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode); + return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode); return 0; } @@ -913,7 +914,7 @@ static struct dentry *nfs_lookup(struct res = ERR_PTR(error); goto out_unlock; } - error = nfs_reval_fsid(dir, &fhandle, &fattr); + error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr); if (error < 0) { res = ERR_PTR(error); goto out_unlock; @@ -922,8 +923,9 @@ static struct dentry *nfs_lookup(struct res = (struct dentry *)inode; if (IS_ERR(res)) goto out_unlock; + no_entry: - res = d_add_unique(dentry, inode); + res = d_materialise_unique(dentry, inode); if (res != NULL) dentry = res; nfs_renew_times(dentry); @@ -1117,11 +1119,13 @@ static struct dentry *nfs_readdir_lookup dput(dentry); return NULL; } - alias = d_add_unique(dentry, inode); + + alias = d_materialise_unique(dentry, inode); if (alias != NULL) { dput(dentry); dentry = alias; } + nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); return dentry; @@ -1147,7 +1151,7 @@ int nfs_instantiate(struct dentry *dentr } if (!(fattr->valid & NFS_ATTR_FATTR)) { struct nfs_server *server = NFS_SB(dentry->d_sb); - error = server->rpc_ops->getattr(server, fhandle, fattr); + error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr); if (error < 0) goto out_err; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index cc2b874..855bb97 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -27,12 +27,14 @@ #include #include #include #include +#include #include #include #include "delegation.h" #include "iostat.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -111,7 +113,7 @@ nfs_file_open(struct inode *inode, struc nfs_inc_stats(inode, NFSIOS_VFSOPEN); lock_kernel(); - res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp); + res = NFS_PROTO(inode)->file_open(inode, filp); unlock_kernel(); return res; } @@ -249,6 +251,10 @@ nfs_file_mmap(struct file * file, struct status = nfs_revalidate_mapping(inode, file->f_mapping); if (!status) status = generic_file_mmap(file, vma); + + if (status == 0) + nfs_fscache_install_vm_ops(inode, vma); + return status; } @@ -308,13 +314,35 @@ static void nfs_invalidate_page(struct p /* Cancel any unstarted writes on this page */ if (offset == 0) nfs_sync_inode_wait(inode, page->index, 1, FLUSH_INVALIDATE); + + nfs_fscache_invalidate_page(page, inode, offset); + + /* we can do this here as the bits are only set with the page lock + * held, and our caller is holding that */ + if (!page->private) + ClearPagePrivate(page); } static int nfs_release_page(struct page *page, gfp_t gfp) { - return !nfs_wb_page(page->mapping->host, page); + int error = nfs_wb_page(page->mapping->host, page); + + if (error == 0) { + nfs_fscache_release_page(page); + + /* may have been set due to either caching or writing */ + ClearPagePrivate(page); + } + + /* releasepage() returns true/false */ + return (error == 0) ? 1 : 0; } +/* + * Since we use page->private for our own nefarious purposes when using + * fscache, we have to override extra address space ops to prevent fs/buffer.c + * from getting confused, even though we may not have asked its opinion + */ const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, .readpages = nfs_readpages, @@ -328,6 +356,9 @@ const struct address_space_operations nf #ifdef CONFIG_NFS_DIRECTIO .direct_IO = nfs_direct_IO, #endif +#ifdef CONFIG_NFS_FSCACHE + .sync_page = block_sync_page, +#endif }; /* diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c new file mode 100644 index 0000000..5400e6a --- /dev/null +++ b/fs/nfs/fscache.c @@ -0,0 +1,349 @@ +/* fscache.c: NFS filesystem cache interface + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +/* + * Sysctl variables + */ +atomic_t nfs_fscache_to_pages; +atomic_t nfs_fscache_from_pages; +atomic_t nfs_fscache_uncache_page; +int nfs_fscache_from_error; +int nfs_fscache_to_error; + +#define NFSDBG_FACILITY NFSDBG_FSCACHE + +/* the auxiliary data in the cache (used for coherency management) */ +struct nfs_fh_auxdata { + struct timespec i_mtime; + struct timespec i_ctime; + loff_t i_size; +}; + +static struct fscache_netfs_operations nfs_cache_ops = { +}; + +struct fscache_netfs nfs_cache_netfs = { + .name = "nfs", + .version = 0, + .ops = &nfs_cache_ops, +}; + +static const uint8_t nfs_cache_ipv6_wrapper_for_ipv4[12] = { + [0 ... 9] = 0x00, + [10 ... 11] = 0xff +}; + +struct nfs_server_key { + uint16_t nfsversion; + uint16_t port; + union { + struct { + uint8_t ipv6wrapper[12]; + struct in_addr addr; + } ipv4_addr; + struct in6_addr ipv6_addr; + }; +}; + +static uint16_t nfs_server_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct nfs_client *clp = cookie_netfs_data; + struct nfs_server_key *key = buffer; + uint16_t len = 0; + + key->nfsversion = clp->cl_nfsversion; + + switch (clp->cl_addr.sin_family) { + case AF_INET: + key->port = clp->cl_addr.sin_port; + + memcpy(&key->ipv4_addr.ipv6wrapper, + &nfs_cache_ipv6_wrapper_for_ipv4, + sizeof(key->ipv4_addr.ipv6wrapper)); + memcpy(&key->ipv4_addr.addr, + &clp->cl_addr.sin_addr, + sizeof(key->ipv4_addr.addr)); + len = sizeof(struct nfs_server_key); + break; + + case AF_INET6: + key->port = clp->cl_addr.sin_port; + + memcpy(&key->ipv6_addr, + &clp->cl_addr.sin_addr, + sizeof(key->ipv6_addr)); + len = sizeof(struct nfs_server_key); + break; + + default: + len = 0; + printk(KERN_WARNING "NFS: Unknown network family '%d'\n", + clp->cl_addr.sin_family); + break; + } + + return len; +} + +/* + * the root index for the filesystem is defined by nfsd IP address and ports + */ +struct fscache_cookie_def nfs_cache_server_index_def = { + .name = "NFS.servers", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = nfs_server_get_key, +}; + +static uint16_t nfs_fh_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct nfs_inode *nfsi = cookie_netfs_data; + uint16_t nsize; + + /* set the file handle */ + nsize = nfsi->fh.size; + memcpy(buffer, nfsi->fh.data, nsize); + return nsize; +} + +/* + * indication of pages that now have cache metadata retained + * - this function should mark the specified pages as now being cached + */ +static void nfs_fh_mark_pages_cached(void *cookie_netfs_data, + struct address_space *mapping, + struct pagevec *cached_pvec) +{ + struct nfs_inode *nfsi = cookie_netfs_data; + unsigned long loop; + + dprintk("NFS: nfs_fh_mark_pages_cached: nfs_inode 0x%p pages %ld\n", + nfsi, cached_pvec->nr); + + for (loop = 0; loop < cached_pvec->nr; loop++) + SetPageNfsCached(cached_pvec->pages[loop]); +} + +/* + * get an extra reference on a read context + * - this function can be absent if the completion function doesn't + * require a context + */ +static void nfs_fh_get_context(void *cookie_netfs_data, void *context) +{ + get_nfs_open_context(context); +} + +/* + * release an extra reference on a read context + * - this function can be absent if the completion function doesn't + * require a context + */ +static void nfs_fh_put_context(void *cookie_netfs_data, void *context) +{ + if (context) + put_nfs_open_context(context); +} + +/* + * indication the cookie is no longer uncached + * - this function is called when the backing store currently caching a cookie + * is removed + * - the netfs should use this to clean up any markers indicating cached pages + * - this is mandatory for any object that may have data + */ +static void nfs_fh_now_uncached(void *cookie_netfs_data) +{ + struct nfs_inode *nfsi = cookie_netfs_data; + struct pagevec pvec; + pgoff_t first; + int loop, nr_pages; + + pagevec_init(&pvec, 0); + first = 0; + + dprintk("NFS: nfs_fh_now_uncached: nfs_inode 0x%p\n", nfsi); + + for (;;) { + /* grab a bunch of pages to clean */ + nr_pages = pagevec_lookup(&pvec, + nfsi->vfs_inode.i_mapping, + first, + PAGEVEC_SIZE - pagevec_count(&pvec)); + if (!nr_pages) + break; + + for (loop = 0; loop < nr_pages; loop++) + ClearPageNfsCached(pvec.pages[loop]); + + first = pvec.pages[nr_pages - 1]->index + 1; + + pvec.nr = nr_pages; + pagevec_release(&pvec); + cond_resched(); + } +} + +/*****************************************************************************/ +/* + * get certain file attributes from the netfs data + * - this function can be absent for an index + * - not permitted to return an error + * - the netfs data from the cookie being used as the source is + * presented + */ +static void nfs_fh_get_attr(const void *cookie_netfs_data, uint64_t *size) +{ + const struct nfs_inode *nfsi = cookie_netfs_data; + + *size = nfsi->vfs_inode.i_size; +} + +/*****************************************************************************/ +/* + * get the auxilliary data from netfs data + * - this function can be absent if the index carries no state data + * - should store the auxilliary data in the buffer + * - should return the amount of amount stored + * - not permitted to return an error + * - the netfs data from the cookie being used as the source is + * presented + */ +static uint16_t nfs_fh_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + struct nfs_fh_auxdata auxdata; + const struct nfs_inode *nfsi = cookie_netfs_data; + + auxdata.i_size = nfsi->vfs_inode.i_size; + auxdata.i_mtime = nfsi->vfs_inode.i_mtime; + auxdata.i_ctime = nfsi->vfs_inode.i_ctime; + + if (bufmax > sizeof(auxdata)) + bufmax = sizeof(auxdata); + + memcpy(buffer, &auxdata, bufmax); + return bufmax; +} + +/*****************************************************************************/ +/* + * consult the netfs about the state of an object + * - this function can be absent if the index carries no state data + * - the netfs data from the cookie being used as the target is + * presented, as is the auxilliary data + */ +static fscache_checkaux_t nfs_fh_check_aux(void *cookie_netfs_data, + const void *data, uint16_t datalen) +{ + struct nfs_fh_auxdata auxdata; + struct nfs_inode *nfsi = cookie_netfs_data; + + if (datalen > sizeof(auxdata)) + return FSCACHE_CHECKAUX_OBSOLETE; + + auxdata.i_size = nfsi->vfs_inode.i_size; + auxdata.i_mtime = nfsi->vfs_inode.i_mtime; + auxdata.i_ctime = nfsi->vfs_inode.i_ctime; + + if (memcmp(data, &auxdata, datalen) != 0) + return FSCACHE_CHECKAUX_OBSOLETE; + + return FSCACHE_CHECKAUX_OKAY; +} + +/* + * the primary index for each server is simply made up of a series of NFS file + * handles + */ +struct fscache_cookie_def nfs_cache_fh_index_def = { + .name = "NFS.fh", + .type = FSCACHE_COOKIE_TYPE_DATAFILE, + .get_key = nfs_fh_get_key, + .get_attr = nfs_fh_get_attr, + .get_aux = nfs_fh_get_aux, + .check_aux = nfs_fh_check_aux, + .get_context = nfs_fh_get_context, + .put_context = nfs_fh_put_context, + .mark_pages_cached = nfs_fh_mark_pages_cached, + .now_uncached = nfs_fh_now_uncached, +}; + +static int nfs_file_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + wait_on_page_fs_misc(page); + return 0; +} + +struct vm_operations_struct nfs_fs_vm_operations = { + .nopage = filemap_nopage, + .populate = filemap_populate, + .page_mkwrite = nfs_file_page_mkwrite, +}; + +/* + * handle completion of a page being stored in the cache + */ +void nfs_readpage_to_fscache_complete(struct page *page, void *data, int error) +{ + dfprintk(FSCACHE, + "NFS: readpage_to_fscache_complete (p:%p(i:%lx f:%lx)/%d)\n", + page, page->index, page->flags, error); + + end_page_fs_misc(page); +} + +/* + * handle completion of a page being read from the cache + * - called in process (keventd) context + */ +void nfs_readpage_from_fscache_complete(struct page *page, + void *context, + int error) +{ + dfprintk(FSCACHE, + "NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n", + page, context, error); + + /* if the read completes with an error, we just unlock the page and let + * the VM reissue the readpage */ + if (!error) { + SetPageUptodate(page); + unlock_page(page); + } else { + error = nfs_readpage_async(context, page->mapping->host, page); + if (error) + unlock_page(page); + } +} + +/* + * handle completion of a page being read from the cache + * - really need to synchronise the end of writeback, probably using a page + * flag, but for the moment we disable caching on writable files + */ +void nfs_writepage_to_fscache_complete(struct page *page, + void *data, + int error) +{ +} diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h new file mode 100644 index 0000000..69f0f40 --- /dev/null +++ b/fs/nfs/fscache.h @@ -0,0 +1,466 @@ +/* fscache.h: NFS filesystem cache interface definitions + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NFS_FSCACHE_H +#define _NFS_FSCACHE_H + +#include +#include +#include + +#ifdef CONFIG_NFS_FSCACHE +#include + +extern struct fscache_netfs nfs_cache_netfs; +extern struct fscache_cookie_def nfs_cache_server_index_def; +extern struct fscache_cookie_def nfs_cache_fh_index_def; +extern struct vm_operations_struct nfs_fs_vm_operations; + +extern void nfs_invalidatepage(struct page *, unsigned long); +extern int nfs_releasepage(struct page *, gfp_t); + +extern atomic_t nfs_fscache_to_pages; +extern atomic_t nfs_fscache_from_pages; +extern atomic_t nfs_fscache_uncache_page; +extern int nfs_fscache_from_error; +extern int nfs_fscache_to_error; + +/* + * register NFS for caching + */ +static inline int nfs_fscache_register(void) +{ + return fscache_register_netfs(&nfs_cache_netfs); +} + +/* + * unregister NFS for caching + */ +static inline void nfs_fscache_unregister(void) +{ + fscache_unregister_netfs(&nfs_cache_netfs); +} + +/* + * get the per-client index cookie for an NFS client if the appropriate mount + * flag was set + * - we always try and get an index cookie for the client, but get filehandle + * cookies on a per-superblock basis, depending on the mount flags + */ +static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) +{ + /* create a cache index for looking up filehandles */ + clp->fscache = fscache_acquire_cookie(nfs_cache_netfs.primary_index, + &nfs_cache_server_index_def, + clp); + dfprintk(FSCACHE,"NFS: get client cookie (0x%p/0x%p)\n", + clp, clp->fscache); +} + +/* + * dispose of a per-client cookie + */ +static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) +{ + dfprintk(FSCACHE,"NFS: releasing client cookie (0x%p/0x%p)\n", + clp, clp->fscache); + + fscache_relinquish_cookie(clp->fscache, 0); + clp->fscache = NULL; +} + +/* + * indicate the client caching state as readable text + */ +static inline const char *nfs_server_fscache_state(struct nfs_server *server) +{ + if (server->nfs_client->fscache && (server->flags & NFS_MOUNT_FSCACHE)) + return "yes"; + return "no "; +} + +/* + * get the per-filehandle cookie for an NFS inode + */ +static inline void nfs_fscache_get_fh_cookie(struct super_block *sb, + struct nfs_inode *nfsi, + int maycache) +{ + nfsi->fscache = NULL; + if (maycache && (NFS_SB(sb)->flags & NFS_MOUNT_FSCACHE)) { + nfsi->fscache = fscache_acquire_cookie( + NFS_SB(sb)->nfs_client->fscache, + &nfs_cache_fh_index_def, + nfsi); + + fscache_set_i_size(nfsi->fscache, nfsi->vfs_inode.i_size); + + dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n", + sb, nfsi, nfsi->fscache); + } +} + +/* + * change the filesize associated with a per-filehandle cookie + */ +static inline void nfs_fscache_set_size(struct nfs_server *server, + struct nfs_inode *nfsi, + loff_t i_size) +{ + fscache_set_i_size(nfsi->fscache, i_size); +} + +/* + * replace a per-filehandle cookie due to revalidation detecting a file having + * changed on the server + */ +static inline void nfs_fscache_renew_fh_cookie(struct nfs_server *server, + struct nfs_inode *nfsi) +{ + struct fscache_cookie *old = nfsi->fscache; + + if (nfsi->fscache) { + /* retire the current fscache cache and get a new one */ + fscache_relinquish_cookie(nfsi->fscache, 1); + + nfsi->fscache = fscache_acquire_cookie( + server->nfs_client->fscache, + &nfs_cache_fh_index_def, + nfsi); + fscache_set_i_size(nfsi->fscache, nfsi->vfs_inode.i_size); + + dfprintk(FSCACHE, + "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n", + server, nfsi, old, nfsi->fscache); + } +} + +/* + * release a per-filehandle cookie + */ +static inline void nfs_fscache_release_fh_cookie(struct nfs_server *server, + struct nfs_inode *nfsi) +{ + dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", + nfsi, nfsi->fscache); + + fscache_relinquish_cookie(nfsi->fscache, 0); + nfsi->fscache = NULL; +} + +/* + * retire a per-filehandle cookie, destroying the data attached to it + */ +static inline void nfs_fscache_zap_fh_cookie(struct nfs_server *server, + struct nfs_inode *nfsi) +{ + dfprintk(FSCACHE,"NFS: zapping cookie (0x%p/0x%p)\n", + nfsi, nfsi->fscache); + + fscache_relinquish_cookie(nfsi->fscache, 1); + nfsi->fscache = NULL; +} + +/* + * turn off the cache with regard to a filehandle cookie if opened for writing, + * invalidating all the pages in the page cache relating to the associated + * inode to clear the per-page caching + */ +static inline void nfs_fscache_disable_fh_cookie(struct inode *inode) +{ + if (NFS_I(inode)->fscache) { + dfprintk(FSCACHE, + "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); + + /* Need to invalided any mapped pages that were read in before + * turning off the cache. + */ + if (inode->i_mapping && inode->i_mapping->nrpages) + invalidate_inode_pages2(inode->i_mapping); + + nfs_fscache_zap_fh_cookie(NFS_SERVER(inode), NFS_I(inode)); + } +} + +/* + * install the VM ops for mmap() of an NFS file so that we can hold up writes + * to pages on shared writable mappings until the store to the cache is + * complete + */ +static inline void nfs_fscache_install_vm_ops(struct inode *inode, + struct vm_area_struct *vma) +{ + if (NFS_I(inode)->fscache) + vma->vm_ops = &nfs_fs_vm_operations; +} + +/* + * release the caching state associated with a page + */ +static void nfs_fscache_release_page(struct page *page) +{ + if (PageNfsCached(page)) { + struct nfs_inode *nfsi = NFS_I(page->mapping->host); + + BUG_ON(nfsi->fscache == NULL); + + dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", + nfsi->fscache, page, nfsi); + + wait_on_page_fs_misc(page); + fscache_uncache_page(nfsi->fscache, page); + atomic_inc(&nfs_fscache_uncache_page); + ClearPageNfsCached(page); + } +} + +/* + * release the caching state associated with a page if undergoing complete page + * invalidation + */ +static inline void nfs_fscache_invalidate_page(struct page *page, + struct inode *inode, + unsigned long offset) +{ + if (PageNfsCached(page)) { + struct nfs_inode *nfsi = NFS_I(page->mapping->host); + + BUG_ON(!nfsi->fscache); + + dfprintk(FSCACHE, + "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n", + nfsi->fscache, page, nfsi); + + if (offset == 0) { + BUG_ON(!PageLocked(page)); + if (!PageWriteback(page)) + nfs_fscache_release_page(page); + } + } +} + +/* + * store a newly fetched page in fscache + */ +extern void nfs_readpage_to_fscache_complete(struct page *, void *, int); + +static inline void nfs_readpage_to_fscache(struct inode *inode, + struct page *page, + int sync) +{ + int ret; + + if (PageNfsCached(page)) { + dfprintk(FSCACHE, + "NFS: " + "readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n", + NFS_I(inode)->fscache, page, page->index, page->flags, + sync); + + if (TestSetPageFsMisc(page)) + BUG(); + + ret = fscache_write_page(NFS_I(inode)->fscache, page, + nfs_readpage_to_fscache_complete, + NULL, GFP_KERNEL); + dfprintk(FSCACHE, + "NFS: " + "readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n", + page, page->index, page->flags, ret); + + if (ret != 0) { + fscache_uncache_page(NFS_I(inode)->fscache, page); + atomic_inc(&nfs_fscache_uncache_page); + ClearPageNfsCached(page); + end_page_fs_misc(page); + nfs_fscache_to_error = ret; + } else { + atomic_inc(&nfs_fscache_to_pages); + } + } +} + +/* + * retrieve a page from fscache + */ +extern void nfs_readpage_from_fscache_complete(struct page *, void *, int); + +static inline +int nfs_readpage_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, + struct page *page) +{ + int ret; + + if (!NFS_I(inode)->fscache) + return 1; + + dfprintk(FSCACHE, + "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n", + NFS_I(inode)->fscache, page, page->index, page->flags, inode); + + ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache, + page, + nfs_readpage_from_fscache_complete, + ctx, + GFP_KERNEL); + + switch (ret) { + case 0: /* read BIO submitted (page in fscache) */ + dfprintk(FSCACHE, + "NFS: readpage_from_fscache: BIO submitted\n"); + atomic_inc(&nfs_fscache_from_pages); + return ret; + + case -ENOBUFS: /* inode not in cache */ + case -ENODATA: /* page not in cache */ + dfprintk(FSCACHE, + "NFS: readpage_from_fscache error %d\n", ret); + return 1; + + default: + dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); + nfs_fscache_from_error = ret; + } + return ret; +} + +/* + * retrieve a set of pages from fscache + */ +static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages) +{ + int ret, npages = *nr_pages; + + if (!NFS_I(inode)->fscache) + return 1; + + dfprintk(FSCACHE, + "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n", + NFS_I(inode)->fscache, *nr_pages, inode); + + ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache, + mapping, pages, nr_pages, + nfs_readpage_from_fscache_complete, + ctx, + mapping_gfp_mask(mapping)); + + + switch (ret) { + case 0: /* read BIO submitted (page in fscache) */ + BUG_ON(!list_empty(pages)); + BUG_ON(*nr_pages != 0); + dfprintk(FSCACHE, + "NFS: nfs_getpages_from_fscache: BIO submitted\n"); + + atomic_add(npages, &nfs_fscache_from_pages); + return ret; + + case -ENOBUFS: /* inode not in cache */ + case -ENODATA: /* page not in cache */ + dfprintk(FSCACHE, + "NFS: nfs_getpages_from_fscache: no page: %d\n", ret); + return 1; + + default: + dfprintk(FSCACHE, + "NFS: nfs_getpages_from_fscache: ret %d\n", ret); + nfs_fscache_from_error = ret; + } + + return ret; +} + +/* + * store an updated page in fscache + */ +extern void nfs_writepage_to_fscache_complete(struct page *page, void *data, int error); + +static inline void nfs_writepage_to_fscache(struct inode *inode, + struct page *page) +{ + int error; + + if (PageNfsCached(page) && NFS_I(inode)->fscache) { + dfprintk(FSCACHE, + "NFS: writepage_to_fscache (0x%p/0x%p/0x%p)\n", + NFS_I(inode)->fscache, page, inode); + + error = fscache_write_page(NFS_I(inode)->fscache, page, + nfs_writepage_to_fscache_complete, + NULL, GFP_KERNEL); + if (error != 0) { + dfprintk(FSCACHE, + "NFS: fscache_write_page error %d\n", + error); + fscache_uncache_page(NFS_I(inode)->fscache, page); + } + } +} + +#else /* CONFIG_NFS_FSCACHE */ +static inline int nfs_fscache_register(void) { return 0; } +static inline void nfs_fscache_unregister(void) {} +static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {} +static inline void nfs4_fscache_get_client_cookie(struct nfs_client *clp) {} +static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} +static inline const char *nfs_server_fscache_state(struct nfs_server *server) { return "no "; } + +static inline void nfs_fscache_get_fh_cookie(struct super_block *sb, + struct nfs_inode *nfsi, + int maycache) +{ +} +static inline void nfs_fscache_set_size(struct nfs_server *server, + struct nfs_inode *nfsi, + loff_t i_size) +{ +} +static inline void nfs_fscache_release_fh_cookie(struct nfs_server *server, + struct nfs_inode *nfsi) +{ +} +static inline void nfs_fscache_zap_fh_cookie(struct nfs_server *server, struct nfs_inode *nfsi) {} +static inline void nfs_fscache_renew_fh_cookie(struct nfs_server *server, struct nfs_inode *nfsi) {} +static inline void nfs_fscache_disable_fh_cookie(struct inode *inode) {} +static inline void nfs_fscache_install_vm_ops(struct inode *inode, struct vm_area_struct *vma) {} +static inline void nfs_fscache_release_page(struct page *page) {} +static inline void nfs_fscache_invalidate_page(struct page *page, + struct inode *inode, + unsigned long offset) +{ +} +static inline void nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) {} +static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, struct page *page) +{ + return -ENOBUFS; +} +static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, + struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages) +{ + return -ENOBUFS; +} + +static inline void nfs_writepage_to_fscache(struct inode *inode, struct page *page) +{ + BUG_ON(PageNfsCached(page)); +} + +#endif /* CONFIG_NFS_FSCACHE */ +#endif /* _NFS_FSCACHE_H */ diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c new file mode 100644 index 0000000..977e590 --- /dev/null +++ b/fs/nfs/getroot.c @@ -0,0 +1,306 @@ +/* getroot.c: get the root dentry for an NFS mount + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "nfs4_fs.h" +#include "delegation.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_CLIENT +#define NFS_PARANOIA 1 + +/* + * get an NFS2/NFS3 root dentry from the root filehandle + */ +struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) +{ + struct nfs_server *server = NFS_SB(sb); + struct nfs_fsinfo fsinfo; + struct nfs_fattr fattr; + struct dentry *mntroot; + struct inode *inode; + int error; + + /* create a dummy root dentry with dummy inode for this superblock */ + if (!sb->s_root) { + struct nfs_fh dummyfh; + struct dentry *root; + struct inode *iroot; + + memset(&dummyfh, 0, sizeof(dummyfh)); + memset(&fattr, 0, sizeof(fattr)); + nfs_fattr_init(&fattr); + fattr.valid = NFS_ATTR_FATTR; + fattr.type = NFDIR; + fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR; + fattr.nlink = 2; + + iroot = nfs_fhget(sb, &dummyfh, &fattr); + if (IS_ERR(iroot)) + return ERR_PTR(PTR_ERR(iroot)); + + root = d_alloc_root(iroot); + if (!root) { + iput(iroot); + return ERR_PTR(-ENOMEM); + } + + sb->s_root = root; + } + + /* get the actual root for this mount */ + fsinfo.fattr = &fattr; + + error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); + if (error < 0) { + dprintk("nfs_get_root: getattr error = %d\n", -error); + return ERR_PTR(error); + } + + inode = nfs_fhget(sb, mntfh, fsinfo.fattr); + if (IS_ERR(inode)) { + dprintk("nfs_get_root: get root inode failed\n"); + return ERR_PTR(PTR_ERR(inode)); + } + + /* root dentries normally start off anonymous and get spliced in later + * if the dentry tree reaches them; however if the dentry already + * exists, we'll pick it up at this point and use it as the root + */ + mntroot = d_alloc_anon(inode); + if (!mntroot) { + iput(inode); + dprintk("nfs_get_root: get root dentry failed\n"); + return ERR_PTR(-ENOMEM); + } + + if (!mntroot->d_op) + mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops; + + return mntroot; +} + +#ifdef CONFIG_NFS_V4 + +/* + * Do a simple pathwalk from the root FH of the server to the nominated target + * of the mountpoint + * - give error on symlinks + * - give error on ".." occurring in the path + * - follow traversals + */ +int nfs4_path_walk(struct nfs_server *server, + struct nfs_fh *mntfh, + const char *path) +{ + struct nfs_fsinfo fsinfo; + struct nfs_fattr fattr; + struct nfs_fh lastfh; + struct qstr name; + int ret; + //int referral_count = 0; + + dprintk("--> nfs4_path_walk(,,%s)\n", path); + + fsinfo.fattr = &fattr; + nfs_fattr_init(&fattr); + + if (*path++ != '/') { + dprintk("nfs4_get_root: Path does not begin with a slash\n"); + return -EINVAL; + } + + /* Start by getting the root filehandle from the server */ + ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); + if (ret < 0) { + dprintk("nfs4_get_root: getroot error = %d\n", -ret); + return ret; + } + + if (fattr.type != NFDIR) { + printk(KERN_ERR "nfs4_get_root:" + " getroot encountered non-directory\n"); + return -ENOTDIR; + } + + if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) { + printk(KERN_ERR "nfs4_get_root:" + " getroot obtained referral\n"); + return -EREMOTE; + } + +next_component: + dprintk("Next: %s\n", path); + + /* extract the next bit of the path */ + if (!*path) + goto path_walk_complete; + + name.name = path; + while (*path && *path != '/') + path++; + name.len = path - (const char *) name.name; + +eat_dot_dir: + while (*path == '/') + path++; + + if (path[0] == '.' && (path[1] == '/' || !path[1])) { + path += 2; + goto eat_dot_dir; + } + + if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2]) + ) { + printk(KERN_ERR "nfs4_get_root:" + " Mount path contains reference to \"..\"\n"); + return -EINVAL; + } + + /* lookup the next FH in the sequence */ + memcpy(&lastfh, mntfh, sizeof(lastfh)); + + dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path); + + ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name, + mntfh, &fattr); + if (ret < 0) { + dprintk("nfs4_get_root: getroot error = %d\n", -ret); + return ret; + } + + if (fattr.type != NFDIR) { + printk(KERN_ERR "nfs4_get_root:" + " lookupfh encountered non-directory\n"); + return -ENOTDIR; + } + + if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) { + printk(KERN_ERR "nfs4_get_root:" + " lookupfh obtained referral\n"); + return -EREMOTE; + } + + goto next_component; + +path_walk_complete: + memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid)); + dprintk("<-- nfs4_path_walk() = 0\n"); + return 0; +} + +/* + * get an NFS4 root dentry from the root filehandle + */ +struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) +{ + struct nfs_server *server = NFS_SB(sb); + struct nfs_fattr fattr; + struct dentry *mntroot; + struct inode *inode; + int error; + + dprintk("--> nfs4_get_root()\n"); + + /* create a dummy root dentry with dummy inode for this superblock */ + if (!sb->s_root) { + struct nfs_fh dummyfh; + struct dentry *root; + struct inode *iroot; + + memset(&dummyfh, 0, sizeof(dummyfh)); + memset(&fattr, 0, sizeof(fattr)); + nfs_fattr_init(&fattr); + fattr.valid = NFS_ATTR_FATTR; + fattr.type = NFDIR; + fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR; + fattr.nlink = 2; + + iroot = nfs_fhget(sb, &dummyfh, &fattr); + if (IS_ERR(iroot)) + return ERR_PTR(PTR_ERR(iroot)); + + root = d_alloc_root(iroot); + if (!root) { + iput(iroot); + return ERR_PTR(-ENOMEM); + } + + sb->s_root = root; + } + + /* get the info about the server and filesystem */ + error = nfs4_server_capabilities(server, mntfh); + if (error < 0) { + dprintk("nfs_get_root: getcaps error = %d\n", + -error); + return ERR_PTR(error); + } + + /* get the actual root for this mount */ + error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr); + if (error < 0) { + dprintk("nfs_get_root: getattr error = %d\n", -error); + return ERR_PTR(error); + } + + inode = nfs_fhget(sb, mntfh, &fattr); + if (IS_ERR(inode)) { + dprintk("nfs_get_root: get root inode failed\n"); + return ERR_PTR(PTR_ERR(inode)); + } + + /* root dentries normally start off anonymous and get spliced in later + * if the dentry tree reaches them; however if the dentry already + * exists, we'll pick it up at this point and use it as the root + */ + mntroot = d_alloc_anon(inode); + if (!mntroot) { + iput(inode); + dprintk("nfs_get_root: get root dentry failed\n"); + return ERR_PTR(-ENOMEM); + } + + if (!mntroot->d_op) + mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops; + + dprintk("<-- nfs4_get_root()\n"); + return mntroot; +} + +#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b81e7ed..7cd17dc 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -57,6 +57,20 @@ #define IDMAP_HASH_SZ 128 /* Default cache timeout is 10 minutes */ unsigned int nfs_idmap_cache_timeout = 600 * HZ; +static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + int jif = num * HZ; + if (endp == val || *endp || num < 0 || jif < num) + return -EINVAL; + *((int *)kp->arg) = jif; + return 0; +} + +module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, + &nfs_idmap_cache_timeout, 0644); + struct idmap_hashent { unsigned long ih_expires; __u32 ih_id; @@ -94,15 +108,16 @@ static struct rpc_pipe_ops idmap_upcall_ .destroy_msg = idmap_pipe_destroy_msg, }; -void -nfs_idmap_new(struct nfs4_client *clp) +int +nfs_idmap_new(struct nfs_client *clp) { struct idmap *idmap; + int error; + + BUG_ON(clp->cl_idmap != NULL); - if (clp->cl_idmap != NULL) - return; if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) - return; + return -ENOMEM; snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), "%s/idmap", clp->cl_rpcclient->cl_pathname); @@ -110,8 +125,9 @@ nfs_idmap_new(struct nfs4_client *clp) idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path, idmap, &idmap_upcall_ops, 0); if (IS_ERR(idmap->idmap_dentry)) { + error = PTR_ERR(idmap->idmap_dentry); kfree(idmap); - return; + return error; } mutex_init(&idmap->idmap_lock); @@ -121,10 +137,11 @@ nfs_idmap_new(struct nfs4_client *clp) idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; clp->cl_idmap = idmap; + return 0; } void -nfs_idmap_delete(struct nfs4_client *clp) +nfs_idmap_delete(struct nfs_client *clp) { struct idmap *idmap = clp->cl_idmap; @@ -479,27 +496,27 @@ static unsigned int fnvhash32(const void return (hash); } -int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) +int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) { struct idmap *idmap = clp->cl_idmap; return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); } -int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) +int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) { struct idmap *idmap = clp->cl_idmap; return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); } -int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf) +int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf) { struct idmap *idmap = clp->cl_idmap; return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); } -int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf) +int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf) { struct idmap *idmap = clp->cl_idmap; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d349fb2..dcf916b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -88,6 +88,8 @@ void nfs_clear_inode(struct inode *inode cred = nfsi->cache_access.cred; if (cred) put_rpccred(cred); + + nfs_fscache_release_fh_cookie(NFS_SERVER(inode), nfsi); BUG_ON(atomic_read(&nfsi->data_updates) != 0); } @@ -134,6 +136,8 @@ void nfs_zap_caches(struct inode *inode) spin_lock(&inode->i_lock); nfs_zap_caches_locked(inode); spin_unlock(&inode->i_lock); + + nfs_fscache_zap_fh_cookie(NFS_SERVER(inode), NFS_I(inode)); } static void nfs_zap_acl_cache(struct inode *inode) @@ -212,6 +216,7 @@ nfs_fhget(struct super_block *sb, struct }; struct inode *inode = ERR_PTR(-ENOENT); unsigned long hash; + int maycache = 1; if ((fattr->valid & NFS_ATTR_FATTR) == 0) goto out_no_inode; @@ -242,13 +247,13 @@ nfs_fhget(struct super_block *sb, struct /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ - inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; + inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &nfs_file_operations; inode->i_data.a_ops = &nfs_file_aops; inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops; + inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) @@ -260,6 +265,7 @@ nfs_fhget(struct super_block *sb, struct else inode->i_op = &nfs_mountpoint_inode_operations; inode->i_fop = NULL; + maycache = 0; } } else if (S_ISLNK(inode->i_mode)) inode->i_op = &nfs_symlink_inode_operations; @@ -292,6 +298,8 @@ nfs_fhget(struct super_block *sb, struct memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); nfsi->cache_access.cred = NULL; + nfs_fscache_get_fh_cookie(sb, nfsi, maycache); + unlock_new_inode(inode); } else nfs_refresh_inode(inode, fattr); @@ -374,6 +382,7 @@ void nfs_setattr_update_inode(struct ino if ((attr->ia_valid & ATTR_SIZE) != 0) { nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); inode->i_size = attr->ia_size; + nfs_fscache_set_size(NFS_SERVER(inode), NFS_I(inode), inode->i_size); vmtruncate(inode, attr->ia_size); } } @@ -556,6 +565,8 @@ int nfs_open(struct inode *inode, struct ctx->mode = filp->f_mode; nfs_file_set_open_context(filp, ctx); put_nfs_open_context(ctx); + if ((filp->f_flags & O_ACCMODE) != O_RDONLY) + nfs_fscache_disable_fh_cookie(inode); return 0; } @@ -694,6 +705,8 @@ int nfs_revalidate_mapping(struct inode } spin_unlock(&inode->i_lock); + nfs_fscache_renew_fh_cookie(NFS_SERVER(inode), nfsi); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -927,11 +940,13 @@ static int nfs_update_inode(struct inode if (data_stable) { inode->i_size = new_isize; invalid |= NFS_INO_INVALID_DATA; + nfs_fscache_set_size(NFS_SERVER(inode), nfsi, inode->i_size); } invalid |= NFS_INO_INVALID_ATTR; } else if (new_isize > cur_isize) { inode->i_size = new_isize; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + nfs_fscache_set_size(NFS_SERVER(inode), nfsi, inode->i_size); } nfsi->cache_change_attribute = jiffies; dprintk("NFS: isize change on server for file %s/%ld\n", @@ -1025,7 +1040,7 @@ #endif out_fileid: printk(KERN_ERR "NFS: server %s error: fileid changed\n" "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", - NFS_SERVER(inode)->hostname, inode->i_sb->s_id, + NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id, (long long)nfsi->fileid, (long long)fattr->fileid); goto out_err; } @@ -1144,6 +1159,14 @@ static int __init init_nfs_fs(void) { int err; + err = nfs_fscache_register(); + if (err < 0) + goto out6; + + err = nfs_fs_proc_init(); + if (err) + goto out5; + err = nfs_init_nfspagecache(); if (err) goto out4; @@ -1184,6 +1207,10 @@ out2: out3: nfs_destroy_nfspagecache(); out4: + nfs_fs_proc_exit(); +out5: + nfs_fscache_unregister(); +out6: return err; } @@ -1194,10 +1221,12 @@ static void __exit exit_nfs_fs(void) nfs_destroy_readpagecache(); nfs_destroy_inodecache(); nfs_destroy_nfspagecache(); + nfs_fscache_unregister(); #ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); #endif unregister_nfs_fs(); + nfs_fs_proc_exit(); } /* Not quite true; I just maintain it */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e4f4e5d..18febb4 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -4,6 +4,42 @@ #include +#define NFS_PAGE_WRITING 0 +#define NFS_PAGE_CACHED 1 + +#define PageNfsBit(bit, page) test_bit(bit, &(page)->private) + +#define SetPageNfsBit(bit, page) \ +do { \ + SetPagePrivate((page)); \ + set_bit(bit, &(page)->private); \ +} while(0) + +#define ClearPageNfsBit(bit, page) \ +do { \ + clear_bit(bit, &(page)->private); \ +} while(0) + +#define PageNfsWriting(page) PageNfsBit(NFS_PAGE_WRITING, (page)) +#define SetPageNfsWriting(page) SetPageNfsBit(NFS_PAGE_WRITING, (page)) +#define ClearPageNfsWriting(page) ClearPageNfsBit(NFS_PAGE_WRITING, (page)) + +#define PageNfsCached(page) PageNfsBit(NFS_PAGE_CACHED, (page)) +#define SetPageNfsCached(page) SetPageNfsBit(NFS_PAGE_CACHED, (page)) +#define ClearPageNfsCached(page) ClearPageNfsBit(NFS_PAGE_CACHED, (page)) + +struct nfs_string; +struct nfs_mount_data; +struct nfs4_mount_data; + +/* Maximum number of readahead requests + * FIXME: this should really be a sysctl so that users may tune it to suit + * their needs. People that do NFS over a slow network, might for + * instance want to reduce it to something closer to 1 for improved + * interactive response. + */ +#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) + struct nfs_clone_mount { const struct super_block *sb; const struct dentry *dentry; @@ -15,7 +51,45 @@ struct nfs_clone_mount { rpc_authflavor_t authflavor; }; -/* namespace-nfs4.c */ +/* + * include filesystem caching stuff here + */ +#include "fscache.h" + +/* client.c */ +extern struct rpc_program nfs_program; + +extern void nfs_put_client(struct nfs_client *); +extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); +extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *, + struct nfs_fh *); +extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *, + const char *, + const struct sockaddr_in *, + const char *, + const char *, + rpc_authflavor_t, + struct nfs_fh *); +extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, + struct nfs_fh *); +extern void nfs_free_server(struct nfs_server *server); +extern struct nfs_server *nfs_clone_server(struct nfs_server *, + struct nfs_fh *, + struct nfs_fattr *); +#ifdef CONFIG_PROC_FS +extern int __init nfs_fs_proc_init(void); +extern void nfs_fs_proc_exit(void); +#else +static inline int nfs_fs_proc_init(void) +{ + return 0; +} +static inline void nfs_fs_proc_exit(void) +{ +} +#endif + +/* nfs4namespace.c */ #ifdef CONFIG_NFS_V4 extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry); #else @@ -46,6 +120,7 @@ #define nfs_destroy_directcache() do {} #endif /* nfs2xdr.c */ +extern int nfs_stat_to_errno(int); extern struct rpc_procinfo nfs_procedures[]; extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); @@ -54,8 +129,9 @@ extern struct rpc_procinfo nfs3_procedur extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); /* nfs4xdr.c */ -extern int nfs_stat_to_errno(int); +#ifdef CONFIG_NFS_V4 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); +#endif /* nfs4proc.c */ #ifdef CONFIG_NFS_V4 @@ -76,10 +152,10 @@ extern void nfs4_clear_inode(struct inod #endif /* super.c */ -extern struct file_system_type nfs_referral_nfs4_fs_type; -extern struct file_system_type clone_nfs_fs_type; +extern struct file_system_type nfs_xdev_fs_type; #ifdef CONFIG_NFS_V4 -extern struct file_system_type clone_nfs4_fs_type; +extern struct file_system_type nfs4_xdev_fs_type; +extern struct file_system_type nfs4_referral_fs_type; #endif extern struct rpc_stat nfs_rpcstat; @@ -88,30 +164,33 @@ extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); /* namespace.c */ -extern char *nfs_path(const char *base, const struct dentry *dentry, +extern char *nfs_path(const char *base, + const struct dentry *droot, + const struct dentry *dentry, char *buffer, ssize_t buflen); -/* - * Determine the mount path as a string - */ -static inline char * -nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen) -{ +/* getroot.c */ +extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *); #ifdef CONFIG_NFS_V4 - return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen); -#else - return NULL; +extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *); + +extern int nfs4_path_walk(struct nfs_server *server, + struct nfs_fh *mntfh, + const char *path); #endif -} + +/* read.c */ +extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, struct page *); /* * Determine the device name as a string */ static inline char *nfs_devname(const struct vfsmount *mnt_parent, - const struct dentry *dentry, - char *buffer, ssize_t buflen) + const struct dentry *dentry, + char *buffer, ssize_t buflen) { - return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen); + return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root, + dentry, buffer, buflen); } /* @@ -167,20 +246,3 @@ void nfs_super_set_maxbytes(struct super if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) sb->s_maxbytes = MAX_LFS_FILESIZE; } - -/* - * Check if the string represents a "valid" IPv4 address - */ -static inline int valid_ipaddr4(const char *buf) -{ - int rc, count, in[4]; - - rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); - if (rc != 4) - return -EINVAL; - for (count = 0; count < 4; count++) { - if (in[count] > 255) - return -EINVAL; - } - return 0; -} diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 19b98ca..271a95d 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -2,6 +2,7 @@ * linux/fs/nfs/namespace.c * * Copyright (C) 2005 Trond Myklebust + * - Modified by David Howells * * NFS namespace */ @@ -28,6 +29,7 @@ int nfs_mountpoint_expiry_timeout = 500 /* * nfs_path - reconstruct the path given an arbitrary dentry * @base - arbitrary string to prepend to the path + * @droot - pointer to root dentry for mountpoint * @dentry - pointer to dentry * @buffer - result buffer * @buflen - length of buffer @@ -38,7 +40,9 @@ int nfs_mountpoint_expiry_timeout = 500 * This is mainly for use in figuring out the path on the * server side when automounting on top of an existing partition. */ -char *nfs_path(const char *base, const struct dentry *dentry, +char *nfs_path(const char *base, + const struct dentry *droot, + const struct dentry *dentry, char *buffer, ssize_t buflen) { char *end = buffer+buflen; @@ -47,7 +51,7 @@ char *nfs_path(const char *base, const s *--end = '\0'; buflen--; spin_lock(&dcache_lock); - while (!IS_ROOT(dentry)) { + while (!IS_ROOT(dentry) && dentry != droot) { namelen = dentry->d_name.len; buflen -= namelen + 1; if (buflen < 0) @@ -94,15 +98,17 @@ static void * nfs_follow_mountpoint(stru struct nfs_fattr fattr; int err; + dprintk("--> nfs_follow_mountpoint()\n"); + BUG_ON(IS_ROOT(dentry)); dprintk("%s: enter\n", __FUNCTION__); dput(nd->dentry); nd->dentry = dget(dentry); - if (d_mountpoint(nd->dentry)) - goto out_follow; + /* Look it up again */ parent = dget_parent(nd->dentry); - err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr); + err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, + &fh, &fattr); dput(parent); if (err != 0) goto out_err; @@ -130,6 +136,8 @@ static void * nfs_follow_mountpoint(stru schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); out: dprintk("%s: done, returned %d\n", __FUNCTION__, err); + + dprintk("<-- nfs_follow_mountpoint() = %d\n", err); return ERR_PTR(err); out_err: path_release(nd); @@ -170,22 +178,23 @@ void nfs_release_automount_timer(void) /* * Clone a mountpoint of the appropriate type */ -static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname, +static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, + const char *devname, struct nfs_clone_mount *mountdata) { #ifdef CONFIG_NFS_V4 struct vfsmount *mnt = NULL; - switch (server->rpc_ops->version) { + switch (server->nfs_client->rpc_ops->version) { case 2: case 3: - mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); + mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); break; case 4: - mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata); + mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata); } return mnt; #else - return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); + return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); #endif } @@ -211,6 +220,8 @@ struct vfsmount *nfs_do_submount(const s char *page = (char *) __get_free_page(GFP_USER); char *devname; + dprintk("--> nfs_do_submount()\n"); + dprintk("%s: submounting on %s/%s\n", __FUNCTION__, dentry->d_parent->d_name.name, dentry->d_name.name); @@ -225,5 +236,7 @@ free_page: free_page((unsigned long)page); out: dprintk("%s: done\n", __FUNCTION__); + + dprintk("<-- nfs_do_submount() = %p\n", mnt); return mnt; } diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 7143b1f..b29543d 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -81,7 +81,7 @@ do_proc_get_root(struct rpc_clnt *client } /* - * Bare-bones access to getattr: this is for nfs_read_super. + * Bare-bones access to getattr: this is for nfs_get_root/nfs_get_sb */ static int nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, @@ -90,8 +90,8 @@ nfs3_proc_get_root(struct nfs_server *se int status; status = do_proc_get_root(server->client, fhandle, info); - if (status && server->client_sys != server->client) - status = do_proc_get_root(server->client_sys, fhandle, info); + if (status && server->nfs_client->cl_rpcclient != server->client) + status = do_proc_get_root(server->nfs_client->cl_rpcclient, fhandle, info); return status; } @@ -785,7 +785,7 @@ nfs3_proc_fsinfo(struct nfs_server *serv dprintk("NFS call fsinfo\n"); nfs_fattr_init(info->fattr); - status = rpc_call_sync(server->client_sys, &msg, 0); + status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); dprintk("NFS reply fsinfo: %d\n", status); return status; } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9a10286..61095fe 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -43,55 +43,6 @@ enum nfs4_client_state { }; /* - * The nfs4_client identifies our client state to the server. - */ -struct nfs4_client { - struct list_head cl_servers; /* Global list of servers */ - struct in_addr cl_addr; /* Server identifier */ - u64 cl_clientid; /* constant */ - nfs4_verifier cl_confirm; - unsigned long cl_state; - - u32 cl_lockowner_id; - - /* - * The following rwsem ensures exclusive access to the server - * while we recover the state following a lease expiration. - */ - struct rw_semaphore cl_sem; - - struct list_head cl_delegations; - struct list_head cl_state_owners; - struct list_head cl_unused; - int cl_nunused; - spinlock_t cl_lock; - atomic_t cl_count; - - struct rpc_clnt * cl_rpcclient; - - struct list_head cl_superblocks; /* List of nfs_server structs */ - - unsigned long cl_lease_time; - unsigned long cl_last_renewal; - struct work_struct cl_renewd; - struct work_struct cl_recoverd; - - struct rpc_wait_queue cl_rpcwaitq; - - /* used for the setclientid verifier */ - struct timespec cl_boot_time; - - /* idmapper */ - struct idmap * cl_idmap; - - /* Our own IP address, as a null-terminated string. - * This is used to generate the clientid, and the callback address. - */ - char cl_ipaddr[16]; - unsigned char cl_id_uniquifier; -}; - -/* * struct rpc_sequence ensures that RPC calls are sent in the exact * order that they appear on the list. */ @@ -127,7 +78,7 @@ static inline void nfs_confirm_seqid(str struct nfs4_state_owner { spinlock_t so_lock; struct list_head so_list; /* per-clientid list of state_owners */ - struct nfs4_client *so_client; + struct nfs_client *so_client; u32 so_id; /* 32-bit identifier, unique */ atomic_t so_count; @@ -210,10 +161,10 @@ extern ssize_t nfs4_listxattr(struct den /* nfs4proc.c */ extern int nfs4_map_errors(int err); -extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short, struct rpc_cred *); -extern int nfs4_proc_setclientid_confirm(struct nfs4_client *, struct rpc_cred *); -extern int nfs4_proc_async_renew(struct nfs4_client *, struct rpc_cred *); -extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *); +extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *); +extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); +extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); +extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); @@ -231,19 +182,14 @@ extern const u32 nfs4_fsinfo_bitmap[2]; extern const u32 nfs4_fs_locations_bitmap[2]; /* nfs4renewd.c */ -extern void nfs4_schedule_state_renewal(struct nfs4_client *); +extern void nfs4_schedule_state_renewal(struct nfs_client *); extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); -extern void nfs4_kill_renewd(struct nfs4_client *); +extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(void *); /* nfs4state.c */ -extern void init_nfsv4_state(struct nfs_server *); -extern void destroy_nfsv4_state(struct nfs_server *); -extern struct nfs4_client *nfs4_get_client(struct in_addr *); -extern void nfs4_put_client(struct nfs4_client *clp); -extern struct nfs4_client *nfs4_find_client(struct in_addr *); -struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp); -extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); +struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); +extern u32 nfs4_alloc_lockowner_id(struct nfs_client *); extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); @@ -252,7 +198,7 @@ extern struct nfs4_state * nfs4_get_open extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, mode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); -extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern void nfs4_schedule_state_recovery(struct nfs_client *); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); @@ -276,10 +222,6 @@ extern struct svc_version nfs4_callback_ #else -#define init_nfsv4_state(server) do { } while (0) -#define destroy_nfsv4_state(server) do { } while (0) -#define nfs4_put_state_owner(inode, owner) do { } while (0) -#define nfs4_put_open_state(state) do { } while (0) #define nfs4_close_state(a, b) do { } while (0) #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index ea38d27..24e47f3 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -2,6 +2,7 @@ * linux/fs/nfs/nfs4namespace.c * * Copyright (C) 2005 Trond Myklebust + * - Modified by David Howells * * NFSv4 namespace */ @@ -23,7 +24,7 @@ #define NFSDBG_FACILITY NFSDBG_VFS /* * Check if fs_root is valid */ -static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname, +static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname, char *buffer, ssize_t buflen) { char *end = buffer + buflen; @@ -34,7 +35,7 @@ static inline char *nfs4_pathname_string n = pathname->ncomponents; while (--n >= 0) { - struct nfs4_string *component = &pathname->components[n]; + const struct nfs4_string *component = &pathname->components[n]; buflen -= component->len + 1; if (buflen < 0) goto Elong; @@ -47,6 +48,68 @@ Elong: return ERR_PTR(-ENAMETOOLONG); } +/* + * Determine the mount path as a string + */ +static char *nfs4_path(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + const char *srvpath; + + srvpath = strchr(mnt_parent->mnt_devname, ':'); + if (srvpath) + srvpath++; + else + srvpath = mnt_parent->mnt_devname; + + return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen); +} + +/* + * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we + * believe to be the server path to this dentry + */ +static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + const struct nfs4_fs_locations *locations, + char *page, char *page2) +{ + const char *path, *fs_path; + + path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE); + if (IS_ERR(path)) + return PTR_ERR(path); + + fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); + if (IS_ERR(fs_path)) + return PTR_ERR(fs_path); + + if (strncmp(path, fs_path, strlen(fs_path)) != 0) { + dprintk("%s: path %s does not begin with fsroot %s\n", + __FUNCTION__, path, fs_path); + return -ENOENT; + } + + return 0; +} + +/* + * Check if the string represents a "valid" IPv4 address + */ +static inline int valid_ipaddr4(const char *buf) +{ + int rc, count, in[4]; + + rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); + if (rc != 4) + return -EINVAL; + for (count = 0; count < 4; count++) { + if (in[count] > 255) + return -EINVAL; + } + return 0; +} /** * nfs_follow_referral - set up mountpoint when hitting a referral on moved error @@ -60,7 +123,7 @@ Elong: */ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, const struct dentry *dentry, - struct nfs4_fs_locations *locations) + const struct nfs4_fs_locations *locations) { struct vfsmount *mnt = ERR_PTR(-ENOENT); struct nfs_clone_mount mountdata = { @@ -68,10 +131,9 @@ static struct vfsmount *nfs_follow_refer .dentry = dentry, .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, }; - char *page, *page2; - char *path, *fs_path; + char *page = NULL, *page2 = NULL; char *devname; - int loc, s; + int loc, s, error; if (locations == NULL || locations->nlocations <= 0) goto out; @@ -79,36 +141,30 @@ static struct vfsmount *nfs_follow_refer dprintk("%s: referral at %s/%s\n", __FUNCTION__, dentry->d_parent->d_name.name, dentry->d_name.name); - /* Ensure fs path is a prefix of current dentry path */ page = (char *) __get_free_page(GFP_USER); - if (page == NULL) + if (!page) goto out; + page2 = (char *) __get_free_page(GFP_USER); - if (page2 == NULL) + if (!page2) goto out; - path = nfs4_path(dentry, page, PAGE_SIZE); - if (IS_ERR(path)) - goto out_free; - - fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); - if (IS_ERR(fs_path)) - goto out_free; - - if (strncmp(path, fs_path, strlen(fs_path)) != 0) { - dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path); - goto out_free; + /* Ensure fs path is a prefix of current dentry path */ + error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2); + if (error < 0) { + mnt = ERR_PTR(error); + goto out; } devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); if (IS_ERR(devname)) { mnt = (struct vfsmount *)devname; - goto out_free; + goto out; } loc = 0; while (loc < locations->nlocations && IS_ERR(mnt)) { - struct nfs4_fs_location *location = &locations->locations[loc]; + const struct nfs4_fs_location *location = &locations->locations[loc]; char *mnt_path; if (location == NULL || location->nservers <= 0 || @@ -140,7 +196,7 @@ static struct vfsmount *nfs_follow_refer addr.sin_port = htons(NFS_PORT); mountdata.addr = &addr; - mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata); + mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, devname, &mountdata); if (!IS_ERR(mnt)) { break; } @@ -149,10 +205,9 @@ static struct vfsmount *nfs_follow_refer loc++; } -out_free: - free_page((unsigned long)page); - free_page((unsigned long)page2); out: + free_page((unsigned long) page); + free_page((unsigned long) page2); dprintk("%s: done\n", __FUNCTION__); return mnt; } @@ -165,7 +220,7 @@ out: */ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) { - struct vfsmount *mnt = ERR_PTR(-ENOENT); + struct vfsmount *mnt = ERR_PTR(-ENOMEM); struct dentry *parent; struct nfs4_fs_locations *fs_locations = NULL; struct page *page; @@ -183,11 +238,16 @@ struct vfsmount *nfs_do_refmount(const s goto out_free; /* Get locations */ + mnt = ERR_PTR(-ENOENT); + parent = dget_parent(dentry); - dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name); + dprintk("%s: getting locations for %s/%s\n", + __FUNCTION__, parent->d_name.name, dentry->d_name.name); + err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page); dput(parent); - if (err != 0 || fs_locations->nlocations <= 0 || + if (err != 0 || + fs_locations->nlocations <= 0 || fs_locations->fs_path.ncomponents <= 0) goto out_free; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e6ee97f..041146f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -64,7 +64,7 @@ static int nfs4_do_fsinfo(struct nfs_ser static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); -static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp); +static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); /* Prevent leaks of NFSv4 errors into userland */ int nfs4_map_errors(int err) @@ -195,7 +195,7 @@ static void nfs4_setup_readdir(u64 cooki static void renew_lease(const struct nfs_server *server, unsigned long timestamp) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; spin_lock(&clp->cl_lock); if (time_before(clp->cl_last_renewal,timestamp)) clp->cl_last_renewal = timestamp; @@ -252,7 +252,7 @@ static struct nfs4_opendata *nfs4_openda atomic_inc(&sp->so_count); p->o_arg.fh = NFS_FH(dir); p->o_arg.open_flags = flags, - p->o_arg.clientid = server->nfs4_state->cl_clientid; + p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id = sp->so_id; p->o_arg.name = &dentry->d_name; p->o_arg.server = server; @@ -550,7 +550,7 @@ int nfs4_open_delegation_recall(struct d case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: /* Don't recall a delegation if it was lost */ - nfs4_schedule_state_recovery(server->nfs4_state); + nfs4_schedule_state_recovery(server->nfs_client); return err; } err = nfs4_handle_exception(server, err, &exception); @@ -758,7 +758,7 @@ static int _nfs4_proc_open(struct nfs4_o } nfs_confirm_seqid(&data->owner->so_seqid, 0); if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) - return server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); + return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); return 0; } @@ -792,7 +792,7 @@ out: int nfs4_recover_expired_lease(struct nfs_server *server) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) nfs4_schedule_state_recovery(clp); @@ -867,7 +867,7 @@ static int _nfs4_open_delegated(struct i { struct nfs_delegation *delegation; struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_state_owner *sp = NULL; struct nfs4_state *state = NULL; @@ -953,7 +953,7 @@ static int _nfs4_do_open(struct inode *d struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; struct nfs4_opendata *opendata; int status; @@ -1133,7 +1133,7 @@ static void nfs4_close_done(struct rpc_t break; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(server->nfs4_state); + nfs4_schedule_state_recovery(server->nfs_client); break; default: if (nfs4_async_handle_error(task, server) == -EAGAIN) { @@ -1268,7 +1268,7 @@ nfs4_atomic_open(struct inode *dir, stru BUG_ON(nd->intent.open.flags & O_CREAT); } - cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); if (IS_ERR(cred)) return (struct dentry *)cred; state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); @@ -1291,7 +1291,7 @@ nfs4_open_revalidate(struct inode *dir, struct rpc_cred *cred; struct nfs4_state *state; - cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); if (IS_ERR(cred)) return PTR_ERR(cred); state = nfs4_open_delegated(dentry->d_inode, openflags, cred); @@ -1393,70 +1393,19 @@ static int nfs4_lookup_root(struct nfs_s return err; } +/* + * get the file handle for the "/" directory on the server + */ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fsinfo *info) { - struct nfs_fattr * fattr = info->fattr; - unsigned char * p; - struct qstr q; - struct nfs4_lookup_arg args = { - .dir_fh = fhandle, - .name = &q, - .bitmask = nfs4_fattr_bitmap, - }; - struct nfs4_lookup_res res = { - .server = server, - .fattr = fattr, - .fh = fhandle, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], - .rpc_argp = &args, - .rpc_resp = &res, - }; int status; - /* - * Now we do a separate LOOKUP for each component of the mount path. - * The LOOKUPs are done separately so that we can conveniently - * catch an ERR_WRONGSEC if it occurs along the way... - */ status = nfs4_lookup_root(server, fhandle, info); - if (status) - goto out; - - p = server->mnt_path; - for (;;) { - struct nfs4_exception exception = { }; - - while (*p == '/') - p++; - if (!*p) - break; - q.name = p; - while (*p && (*p != '/')) - p++; - q.len = p - q.name; - - do { - nfs_fattr_init(fattr); - status = nfs4_handle_exception(server, - rpc_call_sync(server->client, &msg, 0), - &exception); - } while (exception.retry); - if (status == 0) - continue; - if (status == -ENOENT) { - printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path); - printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n"); - } - break; - } if (status == 0) status = nfs4_server_capabilities(server, fhandle); if (status == 0) status = nfs4_do_fsinfo(server, fhandle, info); -out: return nfs4_map_errors(status); } @@ -1565,7 +1514,7 @@ nfs4_proc_setattr(struct dentry *dentry, nfs_fattr_init(fattr); - cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); + cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); if (IS_ERR(cred)) return PTR_ERR(cred); @@ -1583,6 +1532,52 @@ nfs4_proc_setattr(struct dentry *dentry, return status; } +static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, + struct qstr *name, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + int status; + struct nfs4_lookup_arg args = { + .bitmask = server->attr_bitmask, + .dir_fh = dirfh, + .name = name, + }; + struct nfs4_lookup_res res = { + .server = server, + .fattr = fattr, + .fh = fhandle, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + nfs_fattr_init(fattr); + + dprintk("NFS call lookupfh %s\n", name->name); + status = rpc_call_sync(server->client, &msg, 0); + dprintk("NFS reply lookupfh: %d\n", status); + if (status == -NFS4ERR_MOVED) + status = -EREMOTE; + return status; +} + +static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, + struct qstr *name, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_proc_lookupfh(server, dirfh, name, + fhandle, fattr), + &exception); + } while (exception.retry); + return err; +} + static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { @@ -1881,7 +1876,7 @@ nfs4_proc_create(struct inode *dir, stru struct rpc_cred *cred; int status = 0; - cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); if (IS_ERR(cred)) { status = PTR_ERR(cred); goto out; @@ -2521,7 +2516,7 @@ static void nfs4_proc_commit_setup(struc */ static void nfs4_renew_done(struct rpc_task *task, void *data) { - struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; + struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp; unsigned long timestamp = (unsigned long)data; if (task->tk_status < 0) { @@ -2543,7 +2538,7 @@ static const struct rpc_call_ops nfs4_re .rpc_call_done = nfs4_renew_done, }; -int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred) +int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -2555,7 +2550,7 @@ int nfs4_proc_async_renew(struct nfs4_cl &nfs4_renew_ops, (void *)jiffies); } -int nfs4_proc_renew(struct nfs4_client *clp, struct rpc_cred *cred) +int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -2757,7 +2752,7 @@ static int nfs4_proc_set_acl(struct inod return -EOPNOTSUPP; nfs_inode_return_delegation(inode); buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); - ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (ret == 0) nfs4_write_cached_acl(inode, buf, buflen); return ret; @@ -2766,7 +2761,7 @@ static int nfs4_proc_set_acl(struct inod static int nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; if (!clp || task->tk_status >= 0) return 0; @@ -2803,7 +2798,7 @@ static int nfs4_wait_bit_interruptible(v return 0; } -static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) +static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp) { sigset_t oldset; int res; @@ -2846,7 +2841,7 @@ static int nfs4_delay(struct rpc_clnt *c */ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; int ret = errorcode; exception->retry = 0; @@ -2873,7 +2868,7 @@ int nfs4_handle_exception(const struct n return nfs4_map_errors(ret); } -int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) +int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) { nfs4_verifier sc_verifier; struct nfs4_setclientid setclientid = { @@ -2897,7 +2892,7 @@ int nfs4_proc_setclientid(struct nfs4_cl for(;;) { setclientid.sc_name_len = scnprintf(setclientid.sc_name, sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u", - clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr), + clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr), cred->cr_ops->cr_name, clp->cl_id_uniquifier); setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, @@ -2920,7 +2915,7 @@ int nfs4_proc_setclientid(struct nfs4_cl return status; } -static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) +static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) { struct nfs_fsinfo fsinfo; struct rpc_message msg = { @@ -2944,7 +2939,7 @@ static int _nfs4_proc_setclientid_confir return status; } -int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) +int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) { long timeout; int err; @@ -3052,7 +3047,7 @@ int nfs4_proc_delegreturn(struct inode * switch (err) { case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(server->nfs4_state); + nfs4_schedule_state_recovery(server->nfs_client); case 0: return 0; } @@ -3081,7 +3076,7 @@ static int _nfs4_proc_getlk(struct nfs4_ { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; struct nfs_lockt_args arg = { .fh = NFS_FH(inode), .fl = request, @@ -3206,7 +3201,7 @@ static void nfs4_locku_done(struct rpc_t break; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(calldata->server->nfs4_state); + nfs4_schedule_state_recovery(calldata->server->nfs_client); break; default: if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) { @@ -3318,7 +3313,7 @@ static struct nfs4_lockdata *nfs4_alloc_ if (p->arg.lock_seqid == NULL) goto out_free; p->arg.lock_stateid = &lsp->ls_stateid; - p->arg.lock_owner.clientid = server->nfs4_state->cl_clientid; + p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; p->arg.lock_owner.id = lsp->ls_id; p->lsp = lsp; atomic_inc(&lsp->ls_count); @@ -3488,7 +3483,7 @@ static int nfs4_lock_expired(struct nfs4 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { - struct nfs4_client *clp = state->owner->so_client; + struct nfs_client *clp = state->owner->so_client; unsigned char fl_flags = request->fl_flags; int status; @@ -3698,6 +3693,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, + .lookupfh = nfs4_proc_lookupfh, .lookup = nfs4_proc_lookup, .access = nfs4_proc_access, .readlink = nfs4_proc_readlink, @@ -3718,6 +3714,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .statfs = nfs4_proc_statfs, .fsinfo = nfs4_proc_fsinfo, .pathconf = nfs4_proc_pathconf, + .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, .read_setup = nfs4_proc_read_setup, .read_done = nfs4_read_done, diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 5d764d8..f2c8936 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -61,7 +61,7 @@ #define NFSDBG_FACILITY NFSDBG_PROC void nfs4_renew_state(void *data) { - struct nfs4_client *clp = (struct nfs4_client *)data; + struct nfs_client *clp = (struct nfs_client *)data; struct rpc_cred *cred; long lease, timeout; unsigned long last, now; @@ -108,7 +108,7 @@ out: /* Must be called with clp->cl_sem locked for writes */ void -nfs4_schedule_state_renewal(struct nfs4_client *clp) +nfs4_schedule_state_renewal(struct nfs_client *clp) { long timeout; @@ -127,26 +127,13 @@ nfs4_schedule_state_renewal(struct nfs4_ void nfs4_renewd_prepare_shutdown(struct nfs_server *server) { - struct nfs4_client *clp = server->nfs4_state; - - if (!clp) - return; flush_scheduled_work(); - down_write(&clp->cl_sem); - if (!list_empty(&server->nfs4_siblings)) - list_del_init(&server->nfs4_siblings); - up_write(&clp->cl_sem); } -/* Must be called with clp->cl_sem locked for writes */ void -nfs4_kill_renewd(struct nfs4_client *clp) +nfs4_kill_renewd(struct nfs_client *clp) { down_read(&clp->cl_sem); - if (!list_empty(&clp->cl_superblocks)) { - up_read(&clp->cl_sem); - return; - } cancel_delayed_work(&clp->cl_renewd); up_read(&clp->cl_sem); flush_scheduled_work(); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 090a36b..5fffbdf 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -50,149 +50,15 @@ #include #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" +#include "internal.h" #define OPENOWNER_POOL_SIZE 8 const nfs4_stateid zero_stateid; -static DEFINE_SPINLOCK(state_spinlock); static LIST_HEAD(nfs4_clientid_list); -void -init_nfsv4_state(struct nfs_server *server) -{ - server->nfs4_state = NULL; - INIT_LIST_HEAD(&server->nfs4_siblings); -} - -void -destroy_nfsv4_state(struct nfs_server *server) -{ - kfree(server->mnt_path); - server->mnt_path = NULL; - if (server->nfs4_state) { - nfs4_put_client(server->nfs4_state); - server->nfs4_state = NULL; - } -} - -/* - * nfs4_get_client(): returns an empty client structure - * nfs4_put_client(): drops reference to client structure - * - * Since these are allocated/deallocated very rarely, we don't - * bother putting them in a slab cache... - */ -static struct nfs4_client * -nfs4_alloc_client(struct in_addr *addr) -{ - struct nfs4_client *clp; - - if (nfs_callback_up() < 0) - return NULL; - if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) { - nfs_callback_down(); - return NULL; - } - memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); - init_rwsem(&clp->cl_sem); - INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_state_owners); - INIT_LIST_HEAD(&clp->cl_unused); - spin_lock_init(&clp->cl_lock); - atomic_set(&clp->cl_count, 1); - INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); - INIT_LIST_HEAD(&clp->cl_superblocks); - rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); - clp->cl_rpcclient = ERR_PTR(-EINVAL); - clp->cl_boot_time = CURRENT_TIME; - clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; - return clp; -} - -static void -nfs4_free_client(struct nfs4_client *clp) -{ - struct nfs4_state_owner *sp; - - while (!list_empty(&clp->cl_unused)) { - sp = list_entry(clp->cl_unused.next, - struct nfs4_state_owner, - so_list); - list_del(&sp->so_list); - kfree(sp); - } - BUG_ON(!list_empty(&clp->cl_state_owners)); - nfs_idmap_delete(clp); - if (!IS_ERR(clp->cl_rpcclient)) - rpc_shutdown_client(clp->cl_rpcclient); - kfree(clp); - nfs_callback_down(); -} - -static struct nfs4_client *__nfs4_find_client(struct in_addr *addr) -{ - struct nfs4_client *clp; - list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) { - if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) { - atomic_inc(&clp->cl_count); - return clp; - } - } - return NULL; -} - -struct nfs4_client *nfs4_find_client(struct in_addr *addr) -{ - struct nfs4_client *clp; - spin_lock(&state_spinlock); - clp = __nfs4_find_client(addr); - spin_unlock(&state_spinlock); - return clp; -} - -struct nfs4_client * -nfs4_get_client(struct in_addr *addr) -{ - struct nfs4_client *clp, *new = NULL; - - spin_lock(&state_spinlock); - for (;;) { - clp = __nfs4_find_client(addr); - if (clp != NULL) - break; - clp = new; - if (clp != NULL) { - list_add(&clp->cl_servers, &nfs4_clientid_list); - new = NULL; - break; - } - spin_unlock(&state_spinlock); - new = nfs4_alloc_client(addr); - spin_lock(&state_spinlock); - if (new == NULL) - break; - } - spin_unlock(&state_spinlock); - if (new) - nfs4_free_client(new); - return clp; -} - -void -nfs4_put_client(struct nfs4_client *clp) -{ - if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock)) - return; - list_del(&clp->cl_servers); - spin_unlock(&state_spinlock); - BUG_ON(!list_empty(&clp->cl_superblocks)); - rpc_wake_up(&clp->cl_rpcwaitq); - nfs4_kill_renewd(clp); - nfs4_free_client(clp); -} - -static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred) +static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) { int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport, cred); @@ -204,13 +70,13 @@ static int nfs4_init_client(struct nfs4_ } u32 -nfs4_alloc_lockowner_id(struct nfs4_client *clp) +nfs4_alloc_lockowner_id(struct nfs_client *clp) { return clp->cl_lockowner_id ++; } static struct nfs4_state_owner * -nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred) +nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred) { struct nfs4_state_owner *sp = NULL; @@ -224,7 +90,7 @@ nfs4_client_grab_unused(struct nfs4_clie return sp; } -struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp) +struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; struct rpc_cred *cred = NULL; @@ -238,7 +104,7 @@ struct rpc_cred *nfs4_get_renew_cred(str return cred; } -struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp) +struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; @@ -251,7 +117,7 @@ struct rpc_cred *nfs4_get_setclientid_cr } static struct nfs4_state_owner * -nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred) +nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred) { struct nfs4_state_owner *sp, *res = NULL; @@ -294,7 +160,7 @@ nfs4_alloc_state_owner(void) void nfs4_drop_state_owner(struct nfs4_state_owner *sp) { - struct nfs4_client *clp = sp->so_client; + struct nfs_client *clp = sp->so_client; spin_lock(&clp->cl_lock); list_del_init(&sp->so_list); spin_unlock(&clp->cl_lock); @@ -306,7 +172,7 @@ nfs4_drop_state_owner(struct nfs4_state_ */ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp, *new; get_rpccred(cred); @@ -337,7 +203,7 @@ struct nfs4_state_owner *nfs4_get_state_ */ void nfs4_put_state_owner(struct nfs4_state_owner *sp) { - struct nfs4_client *clp = sp->so_client; + struct nfs_client *clp = sp->so_client; struct rpc_cred *cred = sp->so_cred; if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) @@ -540,7 +406,7 @@ __nfs4_find_lock_state(struct nfs4_state static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *lsp; - struct nfs4_client *clp = state->owner->so_client; + struct nfs_client *clp = state->owner->so_client; lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); if (lsp == NULL) @@ -752,7 +618,7 @@ out: static int reclaimer(void *); -static inline void nfs4_clear_recover_bit(struct nfs4_client *clp) +static inline void nfs4_clear_recover_bit(struct nfs_client *clp) { smp_mb__before_clear_bit(); clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state); @@ -764,25 +630,25 @@ static inline void nfs4_clear_recover_bi /* * State recovery routine */ -static void nfs4_recover_state(struct nfs4_client *clp) +static void nfs4_recover_state(struct nfs_client *clp) { struct task_struct *task; __module_get(THIS_MODULE); atomic_inc(&clp->cl_count); task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim", - NIPQUAD(clp->cl_addr)); + NIPQUAD(clp->cl_addr.sin_addr)); if (!IS_ERR(task)) return; nfs4_clear_recover_bit(clp); - nfs4_put_client(clp); + nfs_put_client(clp); module_put(THIS_MODULE); } /* * Schedule a state recovery attempt */ -void nfs4_schedule_state_recovery(struct nfs4_client *clp) +void nfs4_schedule_state_recovery(struct nfs_client *clp) { if (!clp) return; @@ -879,7 +745,7 @@ out_err: return status; } -static void nfs4_state_mark_reclaim(struct nfs4_client *clp) +static void nfs4_state_mark_reclaim(struct nfs_client *clp) { struct nfs4_state_owner *sp; struct nfs4_state *state; @@ -903,7 +769,7 @@ static void nfs4_state_mark_reclaim(stru static int reclaimer(void *ptr) { - struct nfs4_client *clp = ptr; + struct nfs_client *clp = ptr; struct nfs4_state_owner *sp; struct nfs4_state_recovery_ops *ops; struct rpc_cred *cred; @@ -970,12 +836,12 @@ out: if (status == -NFS4ERR_CB_PATH_DOWN) nfs_handle_cb_pathdown(clp); nfs4_clear_recover_bit(clp); - nfs4_put_client(clp); + nfs_put_client(clp); module_put_and_exit(0); return 0; out_error: printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", - NIPQUAD(clp->cl_addr.s_addr), -status); + NIPQUAD(clp->cl_addr.sin_addr), -status); set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); goto out; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1750d99..992a713 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -58,7 +58,7 @@ #define NFSDBG_FACILITY NFSDBG_XDR /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO -static int nfs_stat_to_errno(int); +static int nfs4_stat_to_errno(int); /* NFSv4 COMPOUND tags are only wanted for debugging purposes */ #ifdef DEBUG @@ -529,7 +529,7 @@ static int encode_attrs(struct xdr_strea if (iap->ia_valid & ATTR_MODE) len += 4; if (iap->ia_valid & ATTR_UID) { - owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name); + owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); if (owner_namelen < 0) { printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n", iap->ia_uid); @@ -541,7 +541,7 @@ static int encode_attrs(struct xdr_strea len += 4 + (XDR_QUADLEN(owner_namelen) << 2); } if (iap->ia_valid & ATTR_GID) { - owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group); + owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); if (owner_grouplen < 0) { printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n", iap->ia_gid); @@ -1160,7 +1160,7 @@ static int encode_rename(struct xdr_stre return 0; } -static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid) +static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid) { uint32_t *p; @@ -1246,7 +1246,7 @@ static int encode_setclientid(struct xdr return 0; } -static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state) +static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state) { uint32_t *p; @@ -1945,7 +1945,7 @@ static int nfs4_xdr_enc_server_caps(stru /* * a RENEW request */ -static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) +static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp) { struct xdr_stream xdr; struct compound_hdr hdr = { @@ -1975,7 +1975,7 @@ static int nfs4_xdr_enc_setclientid(stru /* * a SETCLIENTID_CONFIRM request */ -static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) +static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp) { struct xdr_stream xdr; struct compound_hdr hdr = { @@ -2127,12 +2127,12 @@ static int decode_op_hdr(struct xdr_stre } READ32(nfserr); if (nfserr != NFS_OK) - return -nfs_stat_to_errno(nfserr); + return -nfs4_stat_to_errno(nfserr); return 0; } /* Dummy routine */ -static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp) +static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp) { uint32_t *p; unsigned int strlen; @@ -2636,7 +2636,7 @@ static int decode_attr_nlink(struct xdr_ return 0; } -static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid) +static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid) { uint32_t len, *p; @@ -2660,7 +2660,7 @@ static int decode_attr_owner(struct xdr_ return 0; } -static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid) +static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid) { uint32_t len, *p; @@ -3051,9 +3051,9 @@ static int decode_getfattr(struct xdr_st fattr->mode |= fmode; if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0) goto xdr_error; - if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0) + if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0) goto xdr_error; - if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0) + if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0) goto xdr_error; if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0) goto xdr_error; @@ -3254,7 +3254,7 @@ static int decode_delegation(struct xdr_ if (decode_space_limit(xdr, &res->maxsize) < 0) return -EIO; } - return decode_ace(xdr, NULL, res->server->nfs4_state); + return decode_ace(xdr, NULL, res->server->nfs_client); } static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) @@ -3564,7 +3564,7 @@ static int decode_setattr(struct xdr_str return 0; } -static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp) +static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) { uint32_t *p; uint32_t opnum; @@ -3597,7 +3597,7 @@ static int decode_setclientid(struct xdr READ_BUF(len); return -NFSERR_CLID_INUSE; } else - return -nfs_stat_to_errno(nfserr); + return -nfs4_stat_to_errno(nfserr); return 0; } @@ -4255,7 +4255,7 @@ static int nfs4_xdr_dec_fsinfo(struct rp if (!status) status = decode_fsinfo(&xdr, fsinfo); if (!status) - status = -nfs_stat_to_errno(hdr.status); + status = -nfs4_stat_to_errno(hdr.status); return status; } @@ -4334,7 +4334,7 @@ static int nfs4_xdr_dec_renew(struct rpc * a SETCLIENTID request */ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p, - struct nfs4_client *clp) + struct nfs_client *clp) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4345,7 +4345,7 @@ static int nfs4_xdr_dec_setclientid(stru if (!status) status = decode_setclientid(&xdr, clp); if (!status) - status = -nfs_stat_to_errno(hdr.status); + status = -nfs4_stat_to_errno(hdr.status); return status; } @@ -4367,7 +4367,7 @@ static int nfs4_xdr_dec_setclientid_conf if (!status) status = decode_fsinfo(&xdr, fsinfo); if (!status) - status = -nfs_stat_to_errno(hdr.status); + status = -nfs4_stat_to_errno(hdr.status); return status; } @@ -4520,7 +4520,7 @@ static struct { * This one is used jointly by NFSv2 and NFSv3. */ static int -nfs_stat_to_errno(int stat) +nfs4_stat_to_errno(int stat) { int i; for (i = 0; nfs_errtbl[i].stat != -1; i++) { diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 36e902a..c45f724 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -17,6 +17,7 @@ #include #include #include #include +#include "internal.h" #define NFS_PARANOIA 1 @@ -84,7 +85,7 @@ nfs_create_request(struct nfs_open_conte atomic_set(&req->wb_complete, 0); req->wb_index = page->index; page_cache_get(page); - BUG_ON(PagePrivate(page)); + BUG_ON(PageNfsWriting(page)); BUG_ON(!PageLocked(page)); BUG_ON(page->mapping->host != inode); req->wb_offset = offset; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index b3899ea..42a0558 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -66,14 +66,14 @@ nfs_proc_get_root(struct nfs_server *ser dprintk("%s: call getattr\n", __FUNCTION__); nfs_fattr_init(fattr); - status = rpc_call_sync(server->client_sys, &msg, 0); + status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); if (status) return status; dprintk("%s: call statfs\n", __FUNCTION__); msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; msg.rpc_resp = &fsinfo; - status = rpc_call_sync(server->client_sys, &msg, 0); + status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); dprintk("%s: reply statfs: %d\n", __FUNCTION__, status); if (status) return status; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 52bf634..13ff66a 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -26,11 +26,13 @@ #include #include #include #include +#include #include #include #include "iostat.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -162,7 +164,7 @@ static int nfs_readpage_sync(struct nfs_ rdata->args.offset = page_offset(page) + rdata->args.pgbase; dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n", - NFS_SERVER(inode)->hostname, + NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id, (long long)NFS_FILEID(inode), (unsigned long long)rdata->args.pgbase, @@ -200,13 +202,18 @@ static int nfs_readpage_sync(struct nfs_ SetPageUptodate(page); result = 0; + nfs_readpage_to_fscache(inode, page, 1); + unlock_page(page); + + return result; + io_error: unlock_page(page); nfs_readdata_free(rdata); return result; } -static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, +int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct page *page) { LIST_HEAD(one_request); @@ -231,6 +238,11 @@ static int nfs_readpage_async(struct nfs static void nfs_readpage_release(struct nfs_page *req) { + struct inode *d_inode = req->wb_context->dentry->d_inode; + + if (PageUptodate(req->wb_page)) + nfs_readpage_to_fscache(d_inode, req->wb_page, 0); + unlock_page(req->wb_page); dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", @@ -613,6 +625,10 @@ int nfs_readpage(struct file *file, stru ctx = get_nfs_open_context((struct nfs_open_context *) file->private_data); if (!IS_SYNC(inode)) { + error = nfs_readpage_from_fscache(ctx, inode, page); + if (error == 0) + goto out; + error = nfs_readpage_async(ctx, inode, page); goto out; } @@ -643,6 +659,7 @@ readpage_async_filler(void *data, struct unsigned int len; nfs_wb_page(inode, page); + len = nfs_page_length(inode, page); if (len == 0) return nfs_return_empty_page(page); @@ -682,6 +699,17 @@ int nfs_readpages(struct file *filp, str } else desc.ctx = get_nfs_open_context((struct nfs_open_context *) filp->private_data); + + /* attempt to read as many of the pages as possible from the cache + * - this returns -ENOBUFS immediately if the cookie is negative + */ + ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, + pages, &nr_pages); + if (ret == 0) { + put_nfs_open_context(desc.ctx); + return ret; /* all read */ + } + ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); if (!list_empty(&head)) { int err = nfs_pagein_list(&head, server->rpages); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e8a9bee..db4e072 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -13,6 +13,11 @@ * * Split from inode.c by David Howells * + * - superblocks are indexed on server only - all inodes, dentries, etc. associated with a + * particular server are held in the same superblock + * - NFS superblocks can have several effective roots to the dentry tree + * - directory type roots are spliced into the tree when a path from one root reaches the root + * of another (see nfs_lookup()) */ #include @@ -52,66 +57,12 @@ #include "internal.h" #define NFSDBG_FACILITY NFSDBG_VFS -/* Maximum number of readahead requests - * FIXME: this should really be a sysctl so that users may tune it to suit - * their needs. People that do NFS over a slow network, might for - * instance want to reduce it to something closer to 1 for improved - * interactive response. - */ -#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) - -/* - * RPC cruft for NFS - */ -static struct rpc_version * nfs_version[] = { - NULL, - NULL, - &nfs_version2, -#if defined(CONFIG_NFS_V3) - &nfs_version3, -#elif defined(CONFIG_NFS_V4) - NULL, -#endif -#if defined(CONFIG_NFS_V4) - &nfs_version4, -#endif -}; - -static struct rpc_program nfs_program = { - .name = "nfs", - .number = NFS_PROGRAM, - .nrvers = ARRAY_SIZE(nfs_version), - .version = nfs_version, - .stats = &nfs_rpcstat, - .pipe_dir_name = "/nfs", -}; - -struct rpc_stat nfs_rpcstat = { - .program = &nfs_program -}; - - -#ifdef CONFIG_NFS_V3_ACL -static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; -static struct rpc_version * nfsacl_version[] = { - [3] = &nfsacl_version3, -}; - -struct rpc_program nfsacl_program = { - .name = "nfsacl", - .number = NFS_ACL_PROGRAM, - .nrvers = ARRAY_SIZE(nfsacl_version), - .version = nfsacl_version, - .stats = &nfsacl_rpcstat, -}; -#endif /* CONFIG_NFS_V3_ACL */ - static void nfs_umount_begin(struct vfsmount *, int); static int nfs_statfs(struct dentry *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); static int nfs_show_stats(struct seq_file *, struct vfsmount *); static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); -static int nfs_clone_nfs_sb(struct file_system_type *fs_type, +static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static void nfs_kill_super(struct super_block *); @@ -123,10 +74,10 @@ static struct file_system_type nfs_fs_ty .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -struct file_system_type clone_nfs_fs_type = { +struct file_system_type nfs_xdev_fs_type = { .owner = THIS_MODULE, .name = "nfs", - .get_sb = nfs_clone_nfs_sb, + .get_sb = nfs_xdev_get_sb, .kill_sb = nfs_kill_super, .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -145,10 +96,10 @@ static struct super_operations nfs_sops #ifdef CONFIG_NFS_V4 static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); -static int nfs_clone_nfs4_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); -static int nfs_referral_nfs4_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_xdev_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static void nfs4_kill_super(struct super_block *sb); static struct file_system_type nfs4_fs_type = { @@ -159,18 +110,18 @@ static struct file_system_type nfs4_fs_t .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -struct file_system_type clone_nfs4_fs_type = { +struct file_system_type nfs4_xdev_fs_type = { .owner = THIS_MODULE, .name = "nfs4", - .get_sb = nfs_clone_nfs4_sb, + .get_sb = nfs4_xdev_get_sb, .kill_sb = nfs4_kill_super, .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -struct file_system_type nfs_referral_nfs4_fs_type = { +struct file_system_type nfs4_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", - .get_sb = nfs_referral_nfs4_sb, + .get_sb = nfs4_referral_get_sb, .kill_sb = nfs4_kill_super, .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -187,40 +138,6 @@ static struct super_operations nfs4_sops }; #endif -#ifdef CONFIG_NFS_V4 -static const int nfs_set_port_min = 0; -static const int nfs_set_port_max = 65535; - -static int param_set_port(const char *val, struct kernel_param *kp) -{ - char *endp; - int num = simple_strtol(val, &endp, 0); - if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) - return -EINVAL; - *((int *)kp->arg) = num; - return 0; -} - -module_param_call(callback_tcpport, param_set_port, param_get_int, - &nfs_callback_set_tcpport, 0644); -#endif - -#ifdef CONFIG_NFS_V4 -static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) -{ - char *endp; - int num = simple_strtol(val, &endp, 0); - int jif = num * HZ; - if (endp == val || *endp || num < 0 || jif < num) - return -EINVAL; - *((int *)kp->arg) = jif; - return 0; -} - -module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, - &nfs_idmap_cache_timeout, 0644); -#endif - /* * Register the NFS filesystems */ @@ -269,11 +186,10 @@ #endif */ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct super_block *sb = dentry->d_sb; - struct nfs_server *server = NFS_SB(sb); + struct nfs_server *server = NFS_SB(dentry->d_sb); unsigned char blockbits; unsigned long blockres; - struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode); + struct nfs_fh *fh = NFS_FH(dentry->d_inode); struct nfs_fattr fattr; struct nfs_fsstat res = { .fattr = &fattr, @@ -282,7 +198,7 @@ static int nfs_statfs(struct dentry *den lock_kernel(); - error = server->rpc_ops->statfs(server, rootfh, &res); + error = server->nfs_client->rpc_ops->statfs(server, fh, &res); buf->f_type = NFS_SUPER_MAGIC; if (error < 0) goto out_err; @@ -292,7 +208,7 @@ static int nfs_statfs(struct dentry *den * case where f_frsize != f_bsize. Eventually we want to * report the value of wtmult in this field. */ - buf->f_frsize = sb->s_blocksize; + buf->f_frsize = dentry->d_sb->s_blocksize; /* * On most *nix systems, f_blocks, f_bfree, and f_bavail @@ -301,8 +217,8 @@ static int nfs_statfs(struct dentry *den * thus historically Linux's sys_statfs reports these * fields in units of f_bsize. */ - buf->f_bsize = sb->s_blocksize; - blockbits = sb->s_blocksize_bits; + buf->f_bsize = dentry->d_sb->s_blocksize; + blockbits = dentry->d_sb->s_blocksize_bits; blockres = (1 << blockbits) - 1; buf->f_blocks = (res.tbytes + blockres) >> blockbits; buf->f_bfree = (res.fbytes + blockres) >> blockbits; @@ -323,9 +239,12 @@ static int nfs_statfs(struct dentry *den } +/* + * Map the security flavour number to a name + */ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) { - static struct { + static const struct { rpc_authflavor_t flavour; const char *str; } sec_flavours[] = { @@ -356,7 +275,7 @@ static const char *nfs_pseudoflavour_to_ */ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { - static struct proc_nfs_info { + static const struct proc_nfs_info { int flag; char *str; char *nostr; @@ -367,13 +286,15 @@ static void nfs_show_mount_options(struc { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, + { NFS_MOUNT_FSCACHE, ",fsc", "" }, { 0, NULL, NULL } }; - struct proc_nfs_info *nfs_infop; + const struct proc_nfs_info *nfs_infop; + struct nfs_client *clp = nfss->nfs_client; char buf[12]; char *proto; - seq_printf(m, ",vers=%d", nfss->rpc_ops->version); + seq_printf(m, ",vers=%d", clp->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); seq_printf(m, ",wsize=%d", nfss->wsize); if (nfss->acregmin != 3*HZ || showdefaults) @@ -402,8 +323,8 @@ static void nfs_show_mount_options(struc proto = buf; } seq_printf(m, ",proto=%s", proto); - seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); - seq_printf(m, ",retrans=%u", nfss->retrans_count); + seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ); + seq_printf(m, ",retrans=%u", clp->retrans_count); seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); } @@ -417,7 +338,7 @@ static int nfs_show_options(struct seq_f nfs_show_mount_options(m, nfss, 0); seq_puts(m, ",addr="); - seq_escape(m, nfss->hostname, " \t\n\\"); + seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\"); return 0; } @@ -454,7 +375,7 @@ static int nfs_show_stats(struct seq_fil seq_printf(m, ",namelen=%d", nfss->namelen); #ifdef CONFIG_NFS_V4 - if (nfss->rpc_ops->version == 4) { + if (nfss->nfs_client->rpc_ops->version == 4) { seq_printf(m, "\n\tnfsv4:\t"); seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); @@ -501,782 +422,351 @@ #endif /* * Begin unmount by attempting to remove all automounted mountpoints we added - * in response to traversals + * in response to xdev traversals and referrals */ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) { - struct nfs_server *server; - struct rpc_clnt *rpc; - shrink_submounts(vfsmnt, &nfs_automount_list); - if (!(flags & MNT_FORCE)) - return; - /* -EIO all pending I/O */ - server = NFS_SB(vfsmnt->mnt_sb); - rpc = server->client; - if (!IS_ERR(rpc)) - rpc_killall_tasks(rpc); - rpc = server->client_acl; - if (!IS_ERR(rpc)) - rpc_killall_tasks(rpc); } /* - * Obtain the root inode of the file system. + * Validate the NFS2/NFS3 mount data + * - fills in the mount root filehandle */ -static struct inode * -nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) +static int nfs_validate_mount_data(struct nfs_mount_data *data, + struct nfs_fh *mntfh) { - struct nfs_server *server = NFS_SB(sb); - int error; - - error = server->rpc_ops->getroot(server, rootfh, fsinfo); - if (error < 0) { - dprintk("nfs_get_root: getattr error = %d\n", -error); - return ERR_PTR(error); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + return -EINVAL; } - server->fsid = fsinfo->fattr->fsid; - return nfs_fhget(sb, rootfh, fsinfo->fattr); -} - -/* - * Do NFS version-independent mount processing, and sanity checking - */ -static int -nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) -{ - struct nfs_server *server; - struct inode *root_inode; - struct nfs_fattr fattr; - struct nfs_fsinfo fsinfo = { - .fattr = &fattr, - }; - struct nfs_pathconf pathinfo = { - .fattr = &fattr, - }; - int no_root_error = 0; - unsigned long max_rpc_payload; - - /* We probably want something more informative here */ - snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); - - server = NFS_SB(sb); - - sb->s_magic = NFS_SUPER_MAGIC; - - server->io_stats = nfs_alloc_iostats(); - if (server->io_stats == NULL) - return -ENOMEM; + if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); + return -EINVAL; + } - root_inode = nfs_get_root(sb, &server->fh, &fsinfo); - /* Did getting the root inode fail? */ - if (IS_ERR(root_inode)) { - no_root_error = PTR_ERR(root_inode); - goto out_no_root; + switch (data->version) { + case 1: + data->namlen = 0; + case 2: + data->bsize = 0; + case 3: + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: mount structure version %d does not support NFSv3\n", + __FUNCTION__, + data->version); + return -EINVAL; + } + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) { + dprintk("%s: mount structure version %d does not support strong security\n", + __FUNCTION__, + data->version); + return -EINVAL; + } + /* Fill in pseudoflavor for mount version < 5 */ + data->pseudoflavor = RPC_AUTH_UNIX; + case 5: + memset(data->context, 0, sizeof(data->context)); } - sb->s_root = d_alloc_root(root_inode); - if (!sb->s_root) { - no_root_error = -ENOMEM; - goto out_no_root; + +#ifndef CONFIG_NFS_V3 + /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); + return -EPROTONOSUPPORT; } - sb->s_root->d_op = server->rpc_ops->dentry_ops; - - /* mount time stamp, in seconds */ - server->mount_time = jiffies; - - /* Get some general file system info */ - if (server->namelen == 0 && - server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) - server->namelen = pathinfo.max_namelen; - /* Work out a lot of parameters */ - if (server->rsize == 0) - server->rsize = nfs_block_size(fsinfo.rtpref, NULL); - if (server->wsize == 0) - server->wsize = nfs_block_size(fsinfo.wtpref, NULL); - - if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax) - server->rsize = nfs_block_size(fsinfo.rtmax, NULL); - if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax) - server->wsize = nfs_block_size(fsinfo.wtmax, NULL); - - max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); - if (server->rsize > max_rpc_payload) - server->rsize = max_rpc_payload; - if (server->rsize > NFS_MAX_FILE_IO_SIZE) - server->rsize = NFS_MAX_FILE_IO_SIZE; - server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - if (server->wsize > max_rpc_payload) - server->wsize = max_rpc_payload; - if (server->wsize > NFS_MAX_FILE_IO_SIZE) - server->wsize = NFS_MAX_FILE_IO_SIZE; - server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; +#endif /* CONFIG_NFS_V3 */ - if (sb->s_blocksize == 0) - sb->s_blocksize = nfs_block_bits(server->wsize, - &sb->s_blocksize_bits); - server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL); - - server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); - if (server->dtsize > PAGE_CACHE_SIZE) - server->dtsize = PAGE_CACHE_SIZE; - if (server->dtsize > server->rsize) - server->dtsize = server->rsize; - - if (server->flags & NFS_MOUNT_NOAC) { - server->acregmin = server->acregmax = 0; - server->acdirmin = server->acdirmax = 0; - sb->s_flags |= MS_SYNCHRONOUS; + /* We now require that the mount process passes the remote address */ + if (data->addr.sin_addr.s_addr == INADDR_ANY) { + dprintk("%s: mount program didn't pass remote address!\n", + __FUNCTION__); + return -EINVAL; } - server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; - nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); + /* Prepare the root filehandle */ + if (data->flags & NFS_MOUNT_VER3) + mntfh->size = data->root.size; + else + mntfh->size = NFS2_FHSIZE; - server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; - server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; + if (mntfh->size > sizeof(mntfh->data)) { + dprintk("%s: invalid root filehandle\n", __FUNCTION__); + return -EINVAL; + } + + memcpy(mntfh->data, data->root.data, mntfh->size); + if (mntfh->size < sizeof(mntfh->data)) + memset(mntfh->data + mntfh->size, 0, + sizeof(mntfh->data) - mntfh->size); - /* We're airborne Set socket buffersize */ - rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); return 0; - /* Yargs. It didn't work out. */ -out_no_root: - dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error); - if (!IS_ERR(root_inode)) - iput(root_inode); - return no_root_error; } /* - * Initialise the timeout values for a connection + * Initialise the common bits of the superblock */ -static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) +static inline void nfs_initialise_sb(struct super_block *sb) { - to->to_initval = timeo * HZ / 10; - to->to_retries = retrans; - if (!to->to_retries) - to->to_retries = 2; - - switch (proto) { - case IPPROTO_TCP: - if (!to->to_initval) - to->to_initval = 60 * HZ; - if (to->to_initval > NFS_MAX_TCP_TIMEOUT) - to->to_initval = NFS_MAX_TCP_TIMEOUT; - to->to_increment = to->to_initval; - to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); - to->to_exponential = 0; - break; - case IPPROTO_UDP: - default: - if (!to->to_initval) - to->to_initval = 11 * HZ / 10; - if (to->to_initval > NFS_MAX_UDP_TIMEOUT) - to->to_initval = NFS_MAX_UDP_TIMEOUT; - to->to_maxval = NFS_MAX_UDP_TIMEOUT; - to->to_exponential = 1; - break; - } -} + struct nfs_server *server = NFS_SB(sb); -/* - * Create an RPC client handle. - */ -static struct rpc_clnt * -nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) -{ - struct rpc_timeout timeparms; - struct rpc_xprt *xprt = NULL; - struct rpc_clnt *clnt = NULL; - int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; - - nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); - - server->retrans_timeo = timeparms.to_initval; - server->retrans_count = timeparms.to_retries; - - /* create transport and client */ - xprt = xprt_create_proto(proto, &server->addr, &timeparms); - if (IS_ERR(xprt)) { - dprintk("%s: cannot create RPC transport. Error = %ld\n", - __FUNCTION__, PTR_ERR(xprt)); - return (struct rpc_clnt *)xprt; - } - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, data->pseudoflavor); - if (IS_ERR(clnt)) { - dprintk("%s: cannot create RPC client. Error = %ld\n", - __FUNCTION__, PTR_ERR(xprt)); - goto out_fail; - } + sb->s_magic = NFS_SUPER_MAGIC; - clnt->cl_intr = 1; - clnt->cl_softrtry = 1; + /* We probably want something more informative here */ + snprintf(sb->s_id, sizeof(sb->s_id), + "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); - return clnt; + if (sb->s_blocksize == 0) + sb->s_blocksize = nfs_block_bits(server->wsize, + &sb->s_blocksize_bits); + + if (server->flags & NFS_MOUNT_NOAC) + sb->s_flags |= MS_SYNCHRONOUS; -out_fail: - return clnt; + nfs_super_set_maxbytes(sb, server->maxfilesize); } /* - * Clone a server record + * Finish setting up an NFS2/3 superblock */ -static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data) +static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data) { struct nfs_server *server = NFS_SB(sb); - struct nfs_server *parent = NFS_SB(data->sb); - struct inode *root_inode; - struct nfs_fsinfo fsinfo; - void *err = ERR_PTR(-ENOMEM); - - sb->s_op = data->sb->s_op; - sb->s_blocksize = data->sb->s_blocksize; - sb->s_blocksize_bits = data->sb->s_blocksize_bits; - sb->s_maxbytes = data->sb->s_maxbytes; - - server->client_sys = server->client_acl = ERR_PTR(-EINVAL); - server->io_stats = nfs_alloc_iostats(); - if (server->io_stats == NULL) - goto out; - - server->client = rpc_clone_client(parent->client); - if (IS_ERR((err = server->client))) - goto out; - - if (!IS_ERR(parent->client_sys)) { - server->client_sys = rpc_clone_client(parent->client_sys); - if (IS_ERR((err = server->client_sys))) - goto out; - } - if (!IS_ERR(parent->client_acl)) { - server->client_acl = rpc_clone_client(parent->client_acl); - if (IS_ERR((err = server->client_acl))) - goto out; - } - root_inode = nfs_fhget(sb, data->fh, data->fattr); - if (!root_inode) - goto out; - sb->s_root = d_alloc_root(root_inode); - if (!sb->s_root) - goto out_put_root; - fsinfo.fattr = data->fattr; - if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0) - nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); - sb->s_root->d_op = server->rpc_ops->dentry_ops; - sb->s_flags |= MS_ACTIVE; - return server; -out_put_root: - iput(root_inode); -out: - return err; -} -/* - * Copy an existing superblock and attach revised data - */ -static int nfs_clone_generic_sb(struct nfs_clone_mount *data, - struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *), - struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *), - struct vfsmount *mnt) -{ - struct nfs_server *server; - struct nfs_server *parent = NFS_SB(data->sb); - struct super_block *sb = ERR_PTR(-EINVAL); - char *hostname; - int error = -ENOMEM; - int len; - - server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (server == NULL) - goto out_err; - memcpy(server, parent, sizeof(*server)); - hostname = (data->hostname != NULL) ? data->hostname : parent->hostname; - len = strlen(hostname) + 1; - server->hostname = kmalloc(len, GFP_KERNEL); - if (server->hostname == NULL) - goto free_server; - memcpy(server->hostname, hostname, len); - error = rpciod_up(); - if (error != 0) - goto free_hostname; - - sb = fill_sb(server, data); - if (IS_ERR(sb)) { - error = PTR_ERR(sb); - goto kill_rpciod; - } - - if (sb->s_root) - goto out_rpciod_down; + sb->s_blocksize_bits = 0; + sb->s_blocksize = 0; + if (data->bsize) + sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); - server = fill_server(sb, data); - if (IS_ERR(server)) { - error = PTR_ERR(server); - goto out_deactivate; + if (server->flags & NFS_MOUNT_VER3) { + /* The VFS shouldn't apply the umask to mode bits. We will do + * so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; + sb->s_time_gran = 1; } - return simple_set_mnt(mnt, sb); -out_deactivate: - up_write(&sb->s_umount); - deactivate_super(sb); - return error; -out_rpciod_down: - rpciod_down(); - kfree(server->hostname); - kfree(server); - return simple_set_mnt(mnt, sb); -kill_rpciod: - rpciod_down(); -free_hostname: - kfree(server->hostname); -free_server: - kfree(server); -out_err: - return error; + + sb->s_op = &nfs_sops; + nfs_initialise_sb(sb); } /* - * Set up an NFS2/3 superblock - * - * The way this works is that the mount process passes a structure - * in the data argument which contains the server's IP address - * and the root file handle obtained from the server's mount - * daemon. We stash these away in the private superblock fields. + * Finish setting up a cloned NFS2/3 superblock */ -static int -nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) +static void nfs_clone_super(struct super_block *sb, + const struct super_block *old_sb) { - struct nfs_server *server; - rpc_authflavor_t authflavor; + struct nfs_server *server = NFS_SB(sb); - server = NFS_SB(sb); - sb->s_blocksize_bits = 0; - sb->s_blocksize = 0; - if (data->bsize) - sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); - server->flags = data->flags & NFS_MOUNT_FLAGMASK; - - server->acregmin = data->acregmin*HZ; - server->acregmax = data->acregmax*HZ; - server->acdirmin = data->acdirmin*HZ; - server->acdirmax = data->acdirmax*HZ; - - /* Start lockd here, before we might error out */ - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_up(); - - server->namelen = data->namlen; - server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL); - if (!server->hostname) - return -ENOMEM; - strcpy(server->hostname, data->hostname); - - /* Check NFS protocol revision and initialize RPC op vector - * and file handle pool. */ -#ifdef CONFIG_NFS_V3 - if (server->flags & NFS_MOUNT_VER3) { - server->rpc_ops = &nfs_v3_clientops; - server->caps |= NFS_CAP_READDIRPLUS; - } else { - server->rpc_ops = &nfs_v2_clientops; - } -#else - server->rpc_ops = &nfs_v2_clientops; -#endif + sb->s_blocksize_bits = old_sb->s_blocksize_bits; + sb->s_blocksize = old_sb->s_blocksize; + sb->s_maxbytes = old_sb->s_maxbytes; - /* Fill in pseudoflavor for mount version < 5 */ - if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) - data->pseudoflavor = RPC_AUTH_UNIX; - authflavor = data->pseudoflavor; /* save for sb_init() */ - /* XXX maybe we want to add a server->pseudoflavor field */ - - /* Create RPC client handles */ - server->client = nfs_create_client(server, data); - if (IS_ERR(server->client)) - return PTR_ERR(server->client); - /* RFC 2623, sec 2.3.2 */ - if (authflavor != RPC_AUTH_UNIX) { - struct rpc_auth *auth; - - server->client_sys = rpc_clone_client(server->client); - if (IS_ERR(server->client_sys)) - return PTR_ERR(server->client_sys); - auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys); - if (IS_ERR(auth)) - return PTR_ERR(auth); - } else { - atomic_inc(&server->client->cl_count); - server->client_sys = server->client; - } if (server->flags & NFS_MOUNT_VER3) { -#ifdef CONFIG_NFS_V3_ACL - if (!(server->flags & NFS_MOUNT_NOACL)) { - server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); - /* No errors! Assume that Sun nfsacls are supported */ - if (!IS_ERR(server->client_acl)) - server->caps |= NFS_CAP_ACLS; - } -#else - server->flags &= ~NFS_MOUNT_NOACL; -#endif /* CONFIG_NFS_V3_ACL */ - /* - * The VFS shouldn't apply the umask to mode bits. We will - * do so ourselves when necessary. + /* The VFS shouldn't apply the umask to mode bits. We will do + * so ourselves when necessary. */ sb->s_flags |= MS_POSIXACL; - if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) - server->namelen = NFS3_MAXNAMLEN; sb->s_time_gran = 1; - } else { - if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) - server->namelen = NFS2_MAXNAMLEN; } - sb->s_op = &nfs_sops; - return nfs_sb_init(sb, authflavor); + sb->s_op = old_sb->s_op; + nfs_initialise_sb(sb); } -static int nfs_set_super(struct super_block *s, void *data) +static int nfs_set_super(struct super_block *s, void *_server) { - s->s_fs_info = data; - return set_anon_super(s, data); + struct nfs_server *server = _server; + int ret; + + s->s_fs_info = server; + ret = set_anon_super(s, server); + if (ret == 0) + server->s_dev = s->s_dev; + return ret; } static int nfs_compare_super(struct super_block *sb, void *data) { - struct nfs_server *server = data; - struct nfs_server *old = NFS_SB(sb); + struct nfs_server *server = data, *old = NFS_SB(sb); - if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr) + if (old->nfs_client != server->nfs_client) return 0; - if (old->addr.sin_port != server->addr.sin_port) + if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) return 0; - return !nfs_compare_fh(&old->fh, &server->fh); + return 1; } static int nfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { - int error; struct nfs_server *server = NULL; struct super_block *s; - struct nfs_fh *root; + struct nfs_fh mntfh; struct nfs_mount_data *data = raw_data; + struct dentry *mntroot; + int error; - error = -EINVAL; - if (data == NULL) { - dprintk("%s: missing data argument\n", __FUNCTION__); - goto out_err_noserver; - } - if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { - dprintk("%s: bad mount version\n", __FUNCTION__); - goto out_err_noserver; - } - switch (data->version) { - case 1: - data->namlen = 0; - case 2: - data->bsize = 0; - case 3: - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: mount structure version %d does not support NFSv3\n", - __FUNCTION__, - data->version); - goto out_err_noserver; - } - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - case 4: - if (data->flags & NFS_MOUNT_SECFLAVOUR) { - dprintk("%s: mount structure version %d does not support strong security\n", - __FUNCTION__, - data->version); - goto out_err_noserver; - } - case 5: - memset(data->context, 0, sizeof(data->context)); - } -#ifndef CONFIG_NFS_V3 - /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ - error = -EPROTONOSUPPORT; - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); - goto out_err_noserver; - } -#endif /* CONFIG_NFS_V3 */ + /* Validate the mount data */ + error = nfs_validate_mount_data(data, &mntfh); + if (error < 0) + return error; - error = -ENOMEM; - server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (!server) + /* Get a volume representation */ + server = nfs_create_server(data, &mntfh); + if (IS_ERR(server)) { + error = PTR_ERR(server); goto out_err_noserver; - /* Zero out the NFS state stuff */ - init_nfsv4_state(server); - server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); - - root = &server->fh; - if (data->flags & NFS_MOUNT_VER3) - root->size = data->root.size; - else - root->size = NFS2_FHSIZE; - error = -EINVAL; - if (root->size > sizeof(root->data)) { - dprintk("%s: invalid root filehandle\n", __FUNCTION__); - goto out_err; - } - memcpy(root->data, data->root.data, root->size); - - /* We now require that the mount process passes the remote address */ - memcpy(&server->addr, &data->addr, sizeof(server->addr)); - if (server->addr.sin_addr.s_addr == INADDR_ANY) { - dprintk("%s: mount program didn't pass remote address!\n", - __FUNCTION__); - goto out_err; - } - - /* Fire up rpciod if not yet running */ - error = rpciod_up(); - if (error < 0) { - dprintk("%s: couldn't start rpciod! Error = %d\n", - __FUNCTION__, error); - goto out_err; } + /* Get a superblock - note that we may end up sharing one that already exists */ s = sget(fs_type, nfs_compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); - goto out_err_rpciod; + goto out_err_nosb; } - if (s->s_root) - goto out_rpciod_down; + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } - s->s_flags = flags; + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; + nfs_fill_super(s, data); + } - error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); - if (error) { - up_write(&s->s_umount); - deactivate_super(s); - return error; + mntroot = nfs_get_root(s, &mntfh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; } - s->s_flags |= MS_ACTIVE; - return simple_set_mnt(mnt, s); -out_rpciod_down: - rpciod_down(); - kfree(server); - return simple_set_mnt(mnt, s); + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + return 0; -out_err_rpciod: - rpciod_down(); -out_err: - kfree(server); +out_err_nosb: + nfs_free_server(server); out_err_noserver: return error; + +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + return error; } +/* + * Destroy an NFS2/3 superblock + */ static void nfs_kill_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); kill_anon_super(s); - - if (!IS_ERR(server->client)) - rpc_shutdown_client(server->client); - if (!IS_ERR(server->client_sys)) - rpc_shutdown_client(server->client_sys); - if (!IS_ERR(server->client_acl)) - rpc_shutdown_client(server->client_acl); - - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_down(); /* release rpc.lockd */ - - rpciod_down(); /* release rpciod */ - - nfs_free_iostats(server->io_stats); - kfree(server->hostname); - kfree(server); - nfs_release_automount_timer(); + nfs_free_server(server); } -static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) -{ - struct super_block *sb; - - server->fsid = data->fattr->fsid; - nfs_copy_fh(&server->fh, data->fh); - sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); - if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM)) - lockd_up(); - return sb; -} - -static int nfs_clone_nfs_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +/* + * Clone an NFS2/3 server record on xdev traversal (FSID-change) + */ +static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data, + struct vfsmount *mnt) { struct nfs_clone_mount *data = raw_data; - return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server, mnt); -} + struct super_block *s; + struct nfs_server *server; + struct dentry *mntroot; + int error; -#ifdef CONFIG_NFS_V4 -static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, - struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor) -{ - struct nfs4_client *clp; - struct rpc_xprt *xprt = NULL; - struct rpc_clnt *clnt = NULL; - int err = -EIO; - - clp = nfs4_get_client(&server->addr.sin_addr); - if (!clp) { - dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); - return ERR_PTR(err); - } + dprintk("--> nfs_xdev_get_sb()\n"); - /* Now create transport and client */ - down_write(&clp->cl_sem); - if (IS_ERR(clp->cl_rpcclient)) { - xprt = xprt_create_proto(proto, &server->addr, timeparms); - if (IS_ERR(xprt)) { - up_write(&clp->cl_sem); - err = PTR_ERR(xprt); - dprintk("%s: cannot create RPC transport. Error = %d\n", - __FUNCTION__, err); - goto out_fail; - } - /* Bind to a reserved port! */ - xprt->resvport = 1; - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, flavor); - if (IS_ERR(clnt)) { - up_write(&clp->cl_sem); - err = PTR_ERR(clnt); - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - goto out_fail; - } - clnt->cl_intr = 1; - clnt->cl_softrtry = 1; - clp->cl_rpcclient = clnt; - memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); - nfs_idmap_new(clp); - } - list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); - clnt = rpc_clone_client(clp->cl_rpcclient); - if (!IS_ERR(clnt)) - server->nfs4_state = clp; - up_write(&clp->cl_sem); - clp = NULL; - - if (IS_ERR(clnt)) { - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - return clnt; + /* create a new volume representation */ + server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; } - if (server->nfs4_state->cl_idmap == NULL) { - dprintk("%s: failed to create idmapper.\n", __FUNCTION__); - return ERR_PTR(-ENOMEM); + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; } - if (clnt->cl_auth->au_flavor != flavor) { - struct rpc_auth *auth; - - auth = rpcauth_create(flavor, clnt); - if (IS_ERR(auth)) { - dprintk("%s: couldn't create credcache!\n", __FUNCTION__); - return (struct rpc_clnt *)auth; - } + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; } - return clnt; - out_fail: - if (clp) - nfs4_put_client(clp); - return ERR_PTR(err); -} - -/* - * Set up an NFS4 superblock - */ -static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) -{ - struct nfs_server *server; - struct rpc_timeout timeparms; - rpc_authflavor_t authflavour; - int err = -EIO; - - sb->s_blocksize_bits = 0; - sb->s_blocksize = 0; - server = NFS_SB(sb); - if (data->rsize != 0) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize != 0) - server->wsize = nfs_block_size(data->wsize, NULL); - server->flags = data->flags & NFS_MOUNT_FLAGMASK; - server->caps = NFS_CAP_ATOMIC_OPEN; + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; + nfs_clone_super(s, data->sb); + } - server->acregmin = data->acregmin*HZ; - server->acregmax = data->acregmax*HZ; - server->acdirmin = data->acdirmin*HZ; - server->acdirmax = data->acdirmax*HZ; + mntroot = nfs_get_root(s, data->fh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; + } - server->rpc_ops = &nfs_v4_clientops; + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; - nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); + dprintk("<-- nfs_xdev_get_sb() = 0\n"); + return 0; - server->retrans_timeo = timeparms.to_initval; - server->retrans_count = timeparms.to_retries; +out_err_nosb: + nfs_free_server(server); +out_err_noserver: + dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error); + return error; - /* Now create transport and client */ - authflavour = RPC_AUTH_UNIX; - if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen != 1) { - dprintk("%s: Invalid number of RPC auth flavours %d.\n", - __FUNCTION__, data->auth_flavourlen); - err = -EINVAL; - goto out_fail; - } - if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { - err = -EFAULT; - goto out_fail; - } - } +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error); + return error; +} - server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour); - if (IS_ERR(server->client)) { - err = PTR_ERR(server->client); - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - goto out_fail; - } +#ifdef CONFIG_NFS_V4 +/* + * Finish setting up a cloned NFS4 superblock + */ +static void nfs4_clone_super(struct super_block *sb, + const struct super_block *old_sb) +{ + sb->s_blocksize_bits = old_sb->s_blocksize_bits; + sb->s_blocksize = old_sb->s_blocksize; + sb->s_maxbytes = old_sb->s_maxbytes; sb->s_time_gran = 1; - - sb->s_op = &nfs4_sops; - err = nfs_sb_init(sb, authflavour); - - out_fail: - return err; + sb->s_op = old_sb->s_op; + nfs_initialise_sb(sb); } -static int nfs4_compare_super(struct super_block *sb, void *data) +/* + * Set up an NFS4 superblock + */ +static void nfs4_fill_super(struct super_block *sb) { - struct nfs_server *server = data; - struct nfs_server *old = NFS_SB(sb); - - if (strcmp(server->hostname, old->hostname) != 0) - return 0; - if (strcmp(server->mnt_path, old->mnt_path) != 0) - return 0; - return 1; + sb->s_time_gran = 1; + sb->s_op = &nfs4_sops; + nfs_initialise_sb(sb); } -static void * -nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) +static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) { void *p = NULL; @@ -1297,14 +787,22 @@ nfs_copy_user_string(char *dst, struct n return dst; } +/* + * Get the superblock for an NFS4 mountpoint + */ static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { - int error; - struct nfs_server *server; - struct super_block *s; struct nfs4_mount_data *data = raw_data; + struct super_block *s; + struct nfs_server *server; + struct sockaddr_in addr; + rpc_authflavor_t authflavour; + struct nfs_fh mntfh; + struct dentry *mntroot; + char *mntpath = NULL, *hostname = NULL, ip_addr[16]; void *p; + int error; if (data == NULL) { dprintk("%s: missing data argument\n", __FUNCTION__); @@ -1315,84 +813,107 @@ static int nfs4_get_sb(struct file_syste return -EINVAL; } - server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (!server) - return -ENOMEM; - /* Zero out the NFS state stuff */ - init_nfsv4_state(server); - server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + /* We now require that the mount process passes the remote address */ + if (data->host_addrlen != sizeof(addr)) + return -EINVAL; + + if (copy_from_user(&addr, data->host_addr, sizeof(addr))) + return -EFAULT; + + if (addr.sin_family != AF_INET || + addr.sin_addr.s_addr == INADDR_ANY + ) { + dprintk("%s: mount program didn't pass remote IP address!\n", + __FUNCTION__); + return -EINVAL; + } + + /* Grab the authentication type */ + authflavour = RPC_AUTH_UNIX; + if (data->auth_flavourlen != 0) { + if (data->auth_flavourlen != 1) { + dprintk("%s: Invalid number of RPC auth flavours %d.\n", + __FUNCTION__, data->auth_flavourlen); + error = -EINVAL; + goto out_err_noserver; + } + + if (copy_from_user(&authflavour, data->auth_flavours, + sizeof(authflavour))) { + error = -EFAULT; + goto out_err_noserver; + } + } p = nfs_copy_user_string(NULL, &data->hostname, 256); if (IS_ERR(p)) goto out_err; - server->hostname = p; + hostname = p; p = nfs_copy_user_string(NULL, &data->mnt_path, 1024); if (IS_ERR(p)) goto out_err; - server->mnt_path = p; + mntpath = p; + + dprintk("MNTPATH: %s\n", mntpath); - p = nfs_copy_user_string(server->ip_addr, &data->client_addr, - sizeof(server->ip_addr) - 1); + p = nfs_copy_user_string(ip_addr, &data->client_addr, + sizeof(ip_addr) - 1); if (IS_ERR(p)) goto out_err; - /* We now require that the mount process passes the remote address */ - if (data->host_addrlen != sizeof(server->addr)) { - error = -EINVAL; - goto out_free; - } - if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) { - error = -EFAULT; - goto out_free; - } - if (server->addr.sin_family != AF_INET || - server->addr.sin_addr.s_addr == INADDR_ANY) { - dprintk("%s: mount program didn't pass remote IP address!\n", - __FUNCTION__); - error = -EINVAL; - goto out_free; - } - - /* Fire up rpciod if not yet running */ - error = rpciod_up(); - if (error < 0) { - dprintk("%s: couldn't start rpciod! Error = %d\n", - __FUNCTION__, error); - goto out_free; + /* Get a volume representation */ + server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, + authflavour, &mntfh); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; } - s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); - + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(fs_type, nfs_compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_free; } - if (s->s_root) { - kfree(server->mnt_path); - kfree(server->hostname); - kfree(server); - return simple_set_mnt(mnt, s); - } + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; - s->s_flags = flags; + nfs4_fill_super(s); + } else { + nfs_free_server(server); + } - error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0); - if (error) { - up_write(&s->s_umount); - deactivate_super(s); - return error; + mntroot = nfs4_get_root(s, &mntfh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; } + s->s_flags |= MS_ACTIVE; - return simple_set_mnt(mnt, s); + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + kfree(mntpath); + kfree(hostname); + return 0; + out_err: error = PTR_ERR(p); + goto out_err_noserver; + out_free: - kfree(server->mnt_path); - kfree(server->hostname); - kfree(server); + nfs_free_server(server); +out_err_noserver: + kfree(mntpath); + kfree(hostname); return error; + +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + goto out_err_noserver; } static void nfs4_kill_super(struct super_block *sb) @@ -1403,135 +924,140 @@ static void nfs4_kill_super(struct super kill_anon_super(sb); nfs4_renewd_prepare_shutdown(server); + nfs_free_server(server); +} + +/* + * Clone an NFS4 server record on xdev traversal (FSID-change) + */ +static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data, + struct vfsmount *mnt) +{ + struct nfs_clone_mount *data = raw_data; + struct super_block *s; + struct nfs_server *server; + struct dentry *mntroot; + int error; + + dprintk("--> nfs4_xdev_get_sb()\n"); + + /* create a new volume representation */ + server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; + } + + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; + } - if (server->client != NULL && !IS_ERR(server->client)) - rpc_shutdown_client(server->client); + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } - destroy_nfsv4_state(server); + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; + nfs4_clone_super(s, data->sb); + } + + mntroot = nfs4_get_root(s, data->fh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; + } - rpciod_down(); + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + + dprintk("<-- nfs4_xdev_get_sb() = 0\n"); + return 0; + +out_err_nosb: + nfs_free_server(server); +out_err_noserver: + dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error); + return error; - nfs_free_iostats(server->io_stats); - kfree(server->hostname); - kfree(server); - nfs_release_automount_timer(); +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error); + return error; } /* - * Constructs the SERVER-side path + * Create an NFS4 server record on referral traversal */ -static inline char *nfs4_dup_path(const struct dentry *dentry) +static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data, + struct vfsmount *mnt) { - char *page = (char *) __get_free_page(GFP_USER); - char *path; + struct nfs_clone_mount *data = raw_data; + struct super_block *s; + struct nfs_server *server; + struct dentry *mntroot; + struct nfs_fh mntfh; + int error; - path = nfs4_path(dentry, page, PAGE_SIZE); - if (!IS_ERR(path)) { - int len = PAGE_SIZE + page - path; - char *tmp = path; + dprintk("--> nfs4_referral_get_sb()\n"); - path = kmalloc(len, GFP_KERNEL); - if (path) - memcpy(path, tmp, len); - else - path = ERR_PTR(-ENOMEM); + /* create a new volume representation */ + server = nfs4_create_referral_server(data, &mntfh); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; } - free_page((unsigned long)page); - return path; -} -static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) -{ - const struct dentry *dentry = data->dentry; - struct nfs4_client *clp = server->nfs4_state; - struct super_block *sb; - - server->fsid = data->fattr->fsid; - nfs_copy_fh(&server->fh, data->fh); - server->mnt_path = nfs4_dup_path(dentry); - if (IS_ERR(server->mnt_path)) { - sb = (struct super_block *)server->mnt_path; - goto err; + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; } - sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); - if (IS_ERR(sb) || sb->s_root) - goto free_path; - nfs4_server_capabilities(server, &server->fh); - - down_write(&clp->cl_sem); - atomic_inc(&clp->cl_count); - list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); - up_write(&clp->cl_sem); - return sb; -free_path: - kfree(server->mnt_path); -err: - server->mnt_path = NULL; - return sb; -} -static int nfs_clone_nfs4_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) -{ - struct nfs_clone_mount *data = raw_data; - return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server, mnt); -} + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } -static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data) -{ - struct super_block *sb = ERR_PTR(-ENOMEM); - int len; - - len = strlen(data->mnt_path) + 1; - server->mnt_path = kmalloc(len, GFP_KERNEL); - if (server->mnt_path == NULL) - goto err; - memcpy(server->mnt_path, data->mnt_path, len); - memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in)); - - sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); - if (IS_ERR(sb) || sb->s_root) - goto free_path; - return sb; -free_path: - kfree(server->mnt_path); -err: - server->mnt_path = NULL; - return sb; -} + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; + nfs4_fill_super(s); + } -static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data) -{ - struct nfs_server *server = NFS_SB(sb); - struct rpc_timeout timeparms; - int proto, timeo, retrans; - void *err; - - proto = IPPROTO_TCP; - /* Since we are following a referral and there may be alternatives, - set the timeouts and retries to low values */ - timeo = 2; - retrans = 1; - nfs_init_timeout_values(&timeparms, proto, timeo, retrans); - - server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor); - if (IS_ERR((err = server->client))) - goto out_err; + mntroot = nfs4_get_root(s, data->fh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; + } - sb->s_time_gran = 1; - sb->s_op = &nfs4_sops; - err = ERR_PTR(nfs_sb_init(sb, data->authflavor)); - if (!IS_ERR(err)) - return server; -out_err: - return (struct nfs_server *)err; -} + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; -static int nfs_referral_nfs4_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) -{ - struct nfs_clone_mount *data = raw_data; - return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server, mnt); + dprintk("<-- nfs4_referral_get_sb() = 0\n"); + return 0; + +out_err_nosb: + nfs_free_server(server); +out_err_noserver: + dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error); + return error; + +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); + return error; } -#endif +#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 2fe3403..7a25a6d 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -14,6 +14,7 @@ #include #include #include "callback.h" +#include "internal.h" static const int nfs_set_port_min = 0; static const int nfs_set_port_max = 65535; @@ -55,6 +56,48 @@ #endif .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies, }, +#ifdef CONFIG_NFS_FSCACHE + { + .ctl_name = CTL_UNNUMBERED, + .procname = "fscache_from_error", + .data = &nfs_fscache_from_error, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "fscache_to_error", + .data = &nfs_fscache_to_error, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "fscache_uncache_page", + .data = &nfs_fscache_uncache_page, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "fscache_to_pages", + .data = &nfs_fscache_to_pages, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "fscache_from_pages", + .data = &nfs_fscache_from_pages, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif { .ctl_name = 0 } }; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 86bac6a..cd8d972 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -63,6 +63,7 @@ #include #include "delegation.h" #include "iostat.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -163,6 +164,9 @@ static void nfs_grow_file(struct page *p return; nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); i_size_write(inode, end); +#ifdef FSCACHE_WRITE_SUPPORT + nfs_set_fscsize(NFS_SERVER(inode), NFS_I(inode), end); +#endif } /* We can set the PG_uptodate flag if we see that a write request @@ -342,6 +346,9 @@ do_it: err = -EBADF; goto out; } + + nfs_writepage_to_fscache(inode, page); + lock_kernel(); if (!IS_SYNC(inode) && inode_referenced) { err = nfs_writepage_async(ctx, inode, page, 0, offset); @@ -424,7 +431,7 @@ static int nfs_inode_add_request(struct if (nfs_have_delegation(inode, FMODE_WRITE)) nfsi->change_attr++; } - SetPagePrivate(req->wb_page); + SetPageNfsWriting(req->wb_page); nfsi->npages++; atomic_inc(&req->wb_count); return 0; @@ -441,7 +448,7 @@ static void nfs_inode_remove_request(str BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&nfsi->req_lock); - ClearPagePrivate(req->wb_page); + ClearPageNfsWriting(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; if (!nfsi->npages) { @@ -1280,7 +1287,7 @@ #if defined(CONFIG_NFS_V3) || defined(CO if (time_before(complain, jiffies)) { dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", - NFS_SERVER(data->inode)->hostname, + NFS_SERVER(data->inode)->nfs_client->cl_hostname, resp->verf->committed, argp->stable); complain = jiffies + 300 * HZ; } diff --git a/fs/open.c b/fs/open.c index 303f06d..3cf6c00 100644 --- a/fs/open.c +++ b/fs/open.c @@ -974,6 +974,26 @@ struct file *dentry_open(struct dentry * EXPORT_SYMBOL(dentry_open); /* + * open a specifically in-kernel file + */ +struct file *dentry_open_kernel(struct dentry *dentry, struct vfsmount *mnt, int flags) +{ + int error; + struct file *f; + + error = -ENFILE; + f = get_empty_kernel_filp(FKFLAGS_NO_ENFILE); + if (f == NULL) { + dput(dentry); + mntput(mnt); + return ERR_PTR(error); + } + + return __dentry_open(dentry, mnt, flags, f, NULL); +} +EXPORT_SYMBOL_GPL(dentry_open_kernel); + +/* * Find an empty file descriptor entry, and mark it busy. */ int get_unused_fd(void) diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 12dfdcf..df64f04 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2351,7 +2351,7 @@ static int reiserfs_write_full_page(stru struct buffer_head *head, *bh; int partial = 0; int nr = 0; - int checked = PageChecked(page); + int checked = PageFsMisc(page); struct reiserfs_transaction_handle th; struct super_block *s = inode->i_sb; int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; @@ -2420,7 +2420,7 @@ static int reiserfs_write_full_page(stru * blocks we're going to log */ if (checked) { - ClearPageChecked(page); + ClearPageFsMisc(page); reiserfs_write_lock(s); error = journal_begin(&th, s, bh_per_page + 1); if (error) { @@ -2801,7 +2801,7 @@ static void reiserfs_invalidatepage(stru BUG_ON(!PageLocked(page)); if (offset == 0) - ClearPageChecked(page); + ClearPageFsMisc(page); if (!page_has_buffers(page)) goto out; @@ -2842,7 +2842,7 @@ static int reiserfs_set_page_dirty(struc { struct inode *inode = page->mapping->host; if (reiserfs_file_data_log(inode)) { - SetPageChecked(page); + SetPageFsMisc(page); return __set_page_dirty_nobuffers(page); } return __set_page_dirty_buffers(page); @@ -2865,7 +2865,7 @@ static int reiserfs_releasepage(struct p struct buffer_head *bh; int ret = 1; - WARN_ON(PageChecked(page)); + WARN_ON(PageFsMisc(page)); spin_lock(&j->j_dirty_buffers_lock); head = page_buffers(page); bh = head; diff --git a/fs/super.c b/fs/super.c index 6d4e817..3bf8e5f 100644 --- a/fs/super.c +++ b/fs/super.c @@ -228,17 +228,17 @@ static int grab_super(struct super_block * that need destruction out of superblock, call generic_shutdown_super() * and release aforementioned objects. Note: dentries and inodes _are_ * taken care of and do not need specific handling. + * + * Upon calling this function, the filesystem may no longer alter or + * rearrange the set of dentries belonging to this super_block, nor may it + * change the attachments of dentries to inodes. */ void generic_shutdown_super(struct super_block *sb) { - struct dentry *root = sb->s_root; struct super_operations *sop = sb->s_op; - if (root) { - sb->s_root = NULL; - shrink_dcache_parent(root); - shrink_dcache_sb(sb); - dput(root); + if (sb->s_root) { + shrink_dcache_for_umount(sb); fsync_super(sb); lock_super(sb); sb->s_flags &= ~MS_ACTIVE; diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 7f0a0aa..e04327c 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -135,7 +135,7 @@ static void ufs_check_page(struct page * if (offs != limit) goto Eend; out: - SetPageChecked(page); + SetPageFsMisc(page); return; /* Too bad, we had an error */ @@ -173,7 +173,7 @@ Eend: "offset=%lu", dir->i_ino, (page->index< + +#define NR_MAXCACHES BITS_PER_LONG + +struct fscache_cache; +struct fscache_cache_ops; +struct fscache_object; + +/* + * cache tag definition + */ +struct fscache_cache_tag { + struct list_head link; + struct fscache_cache *cache; /* cache referred to by this tag */ + atomic_t usage; + char name[0]; /* tag name */ +}; + +/* + * cache definition + */ +struct fscache_cache { + struct fscache_cache_ops *ops; + struct fscache_cache_tag *tag; /* tag representing this cache */ + struct list_head link; /* link in list of caches */ + struct rw_semaphore withdrawal_sem; /* withdrawal control sem */ + size_t max_index_size; /* maximum size of index data */ + char identifier[32]; /* cache label */ + + /* node management */ + struct list_head object_list; /* list of data/index objects */ + spinlock_t object_list_lock; + struct fscache_object *fsdef; /* object for the fsdef index */ + unsigned long flags; +#define FSCACHE_IOERROR 0 /* cache stopped on I/O error */ +}; + +extern void fscache_init_cache(struct fscache_cache *cache, + struct fscache_cache_ops *ops, + const char *idfmt, + ...) __attribute__ ((format (printf,3,4))); + +extern int fscache_add_cache(struct fscache_cache *cache, + struct fscache_object *fsdef, + const char *tagname); +extern void fscache_withdraw_cache(struct fscache_cache *cache); + +extern void fscache_io_error(struct fscache_cache *cache); + +/*****************************************************************************/ +/* + * cache operations + */ +struct fscache_cache_ops { + /* name of cache provider */ + const char *name; + + /* look up the object for a cookie, creating it on disc if necessary */ + struct fscache_object *(*lookup_object)(struct fscache_cache *cache, + struct fscache_object *parent, + struct fscache_cookie *cookie); + + /* increment the usage count on this object (may fail if unmounting) */ + struct fscache_object *(*grab_object)(struct fscache_object *object); + + /* lock a semaphore on an object */ + void (*lock_object)(struct fscache_object *object); + + /* unlock a semaphore on an object */ + void (*unlock_object)(struct fscache_object *object); + + /* pin an object in the cache */ + int (*pin_object)(struct fscache_object *object); + + /* unpin an object in the cache */ + void (*unpin_object)(struct fscache_object *object); + + /* store the updated auxilliary data on an object */ + void (*update_object)(struct fscache_object *object); + + /* dispose of a reference to an object */ + void (*put_object)(struct fscache_object *object); + + /* sync a cache */ + void (*sync_cache)(struct fscache_cache *cache); + + /* set the data size of an object */ + int (*set_i_size)(struct fscache_object *object, loff_t i_size); + + /* reserve space for an object's data and associated metadata */ + int (*reserve_space)(struct fscache_object *object, loff_t i_size); + + /* request a backing block for a page be read or allocated in the + * cache */ + int (*read_or_alloc_page)(struct fscache_object *object, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + unsigned long gfp); + + /* request backing blocks for a list of pages be read or allocated in + * the cache */ + int (*read_or_alloc_pages)(struct fscache_object *object, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages, + fscache_rw_complete_t end_io_func, + void *context, + unsigned long gfp); + + /* request a backing block for a page be allocated in the cache so that + * it can be written directly */ + int (*allocate_page)(struct fscache_object *object, + struct page *page, + unsigned long gfp); + + /* write a page to its backing block in the cache */ + int (*write_page)(struct fscache_object *object, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + unsigned long gfp); + + /* write several pages to their backing blocks in the cache */ + int (*write_pages)(struct fscache_object *object, + struct pagevec *pagevec, + fscache_rw_complete_t end_io_func, + void *context, + unsigned long gfp); + + /* detach backing block from a bunch of pages */ + void (*uncache_pages)(struct fscache_object *object, + struct pagevec *pagevec); + + /* dissociate a cache from all the pages it was backing */ + void (*dissociate_pages)(struct fscache_cache *cache); +}; + +/*****************************************************************************/ +/* + * data file or index object cookie + * - a file will only appear in one cache + * - a request to cache a file may or may not be honoured, subject to + * constraints such as disc space + * - indexes files are created on disc just-in-time + */ +struct fscache_cookie { + atomic_t usage; /* number of users of this cookie */ + atomic_t children; /* number of children of this cookie */ + struct rw_semaphore sem; /* list creation vs scan lock */ + struct hlist_head backing_objects; /* object(s) backing this file/index */ + struct fscache_cookie_def *def; /* definition */ + struct fscache_cookie *parent; /* parent of this entry */ + struct fscache_netfs *netfs; /* owner network fs definition */ + void *netfs_data; /* back pointer to netfs */ +}; + +extern struct fscache_cookie fscache_fsdef_index; + +/*****************************************************************************/ +/* + * on-disc cache file or index handle + */ +struct fscache_object { + unsigned long flags; +#define FSCACHE_OBJECT_RELEASING 0 /* T if object is being released */ +#define FSCACHE_OBJECT_RECYCLING 1 /* T if object is being retired */ +#define FSCACHE_OBJECT_WITHDRAWN 2 /* T if object has been withdrawn */ + + struct list_head cache_link; /* link in cache->object_list */ + struct hlist_node cookie_link; /* link in cookie->backing_objects */ + struct fscache_cache *cache; /* cache that supplied this object */ + struct fscache_cookie *cookie; /* netfs's file/index object */ +}; + +static inline +void fscache_object_init(struct fscache_object *object) +{ + object->flags = 0; + INIT_LIST_HEAD(&object->cache_link); + INIT_HLIST_NODE(&object->cookie_link); + object->cache = NULL; + object->cookie = NULL; +} + +/* find the parent index object for a object */ +static inline +struct fscache_object *fscache_find_parent_object(struct fscache_object *object) +{ + struct fscache_object *parent; + struct fscache_cookie *cookie = object->cookie; + struct fscache_cache *cache = object->cache; + struct hlist_node *_p; + + hlist_for_each_entry(parent, _p, + &cookie->parent->backing_objects, + cookie_link + ) { + if (parent->cache == cache) + return parent; + } + + return NULL; +} + +/* get an extra reference to a context */ +static inline +void *fscache_get_context(struct fscache_cookie *cookie, void *context) +{ + if (cookie->def->get_context) + cookie->def->get_context(cookie->netfs_data, context); + return context; +} + +/* release an extra reference to a context */ +static inline +void fscache_put_context(struct fscache_cookie *cookie, void *context) +{ + if (cookie->def->put_context) + cookie->def->put_context(cookie->netfs_data, context); +} + +#endif /* _LINUX_FSCACHE_CACHE_H */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h new file mode 100644 index 0000000..b9697f9 --- /dev/null +++ b/include/linux/fscache.h @@ -0,0 +1,496 @@ +/* fscache.h: general filesystem caching interface + * + * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * NOTE!!! See: + * + * Documentation/filesystems/caching/netfs-api.txt + * + * for a description of the network filesystem interface declared here. + */ + +#ifndef _LINUX_FSCACHE_H +#define _LINUX_FSCACHE_H + +#include +#include +#include +#include +#include + +struct pagevec; +struct fscache_cache_tag; +struct fscache_cookie; +struct fscache_netfs; +struct fscache_netfs_operations; + +typedef void (*fscache_rw_complete_t)(struct page *page, + void *context, + int error); + +/* result of index entry consultation */ +typedef enum { + FSCACHE_CHECKAUX_OKAY, /* entry okay as is */ + FSCACHE_CHECKAUX_NEEDS_UPDATE, /* entry requires update */ + FSCACHE_CHECKAUX_OBSOLETE, /* entry requires deletion */ +} fscache_checkaux_t; + +/*****************************************************************************/ +/* + * fscache cookie definition + */ +struct fscache_cookie_def +{ + /* name of cookie type */ + char name[16]; + + /* cookie type */ + uint8_t type; +#define FSCACHE_COOKIE_TYPE_INDEX 0 +#define FSCACHE_COOKIE_TYPE_DATAFILE 1 + + /* select the cache into which to insert an entry in this index + * - optional + * - should return a cache identifier or NULL to cause the cache to be + * inherited from the parent if possible or the first cache picked + * for a non-index file if not + */ + struct fscache_cache_tag *(*select_cache)(const void *parent_netfs_data, + const void *cookie_netfs_data); + + /* get an index key + * - should store the key data in the buffer + * - should return the amount of amount stored + * - not permitted to return an error + * - the netfs data from the cookie being used as the source is + * presented + */ + uint16_t (*get_key)(const void *cookie_netfs_data, + void *buffer, + uint16_t bufmax); + + /* get certain file attributes from the netfs data + * - this function can be absent for an index + * - not permitted to return an error + * - the netfs data from the cookie being used as the source is + * presented + */ + void (*get_attr)(const void *cookie_netfs_data, uint64_t *size); + + /* get the auxilliary data from netfs data + * - this function can be absent if the index carries no state data + * - should store the auxilliary data in the buffer + * - should return the amount of amount stored + * - not permitted to return an error + * - the netfs data from the cookie being used as the source is + * presented + */ + uint16_t (*get_aux)(const void *cookie_netfs_data, + void *buffer, + uint16_t bufmax); + + /* consult the netfs about the state of an object + * - this function can be absent if the index carries no state data + * - the netfs data from the cookie being used as the target is + * presented, as is the auxilliary data + */ + fscache_checkaux_t (*check_aux)(void *cookie_netfs_data, + const void *data, + uint16_t datalen); + + /* get an extra reference on a read context + * - this function can be absent if the completion function doesn't + * require a context + */ + void (*get_context)(void *cookie_netfs_data, void *context); + + /* release an extra reference on a read context + * - this function can be absent if the completion function doesn't + * require a context + */ + void (*put_context)(void *cookie_netfs_data, void *context); + + /* indicate pages that now have cache metadata retained + * - this function should mark the specified pages as now being cached + */ + void (*mark_pages_cached)(void *cookie_netfs_data, + struct address_space *mapping, + struct pagevec *cached_pvec); + + /* indicate the cookie is no longer cached + * - this function is called when the backing store currently caching + * a cookie is removed + * - the netfs should use this to clean up any markers indicating + * cached pages + * - this is mandatory for any object that may have data + */ + void (*now_uncached)(void *cookie_netfs_data); +}; + +/* pattern used to fill dead space in an index entry */ +#define FSCACHE_INDEX_DEADFILL_PATTERN 0x79 + +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +extern struct fscache_cookie *__fscache_acquire_cookie(struct fscache_cookie *parent, + struct fscache_cookie_def *def, + void *netfs_data); + +extern void __fscache_relinquish_cookie(struct fscache_cookie *cookie, + int retire); + +extern void __fscache_update_cookie(struct fscache_cookie *cookie); +#endif + +static inline +struct fscache_cookie *fscache_acquire_cookie(struct fscache_cookie *parent, + struct fscache_cookie_def *def, + void *netfs_data) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + if (parent) + return __fscache_acquire_cookie(parent, def, netfs_data); +#endif + return NULL; +} + +static inline +void fscache_relinquish_cookie(struct fscache_cookie *cookie, + int retire) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + if (cookie) + __fscache_relinquish_cookie(cookie, retire); +#endif +} + +static inline +void fscache_update_cookie(struct fscache_cookie *cookie) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + if (cookie) + __fscache_update_cookie(cookie); +#endif +} + +/*****************************************************************************/ +/* + * pin or unpin a cookie in a cache + * - only available for data cookies + */ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +extern int __fscache_pin_cookie(struct fscache_cookie *cookie); +extern void __fscache_unpin_cookie(struct fscache_cookie *cookie); +#endif + +static inline +int fscache_pin_cookie(struct fscache_cookie *cookie) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + if (cookie) + return __fscache_pin_cookie(cookie); +#endif + return -ENOBUFS; +} + +static inline +void fscache_unpin_cookie(struct fscache_cookie *cookie) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + if (cookie) + __fscache_unpin_cookie(cookie); +#endif +} + +/*****************************************************************************/ +/* + * fscache cached network filesystem type + * - name, version and ops must be filled in before registration + * - all other fields will be set during registration + */ +struct fscache_netfs +{ + uint32_t version; /* indexing version */ + const char *name; /* filesystem name */ + struct fscache_cookie *primary_index; + struct fscache_netfs_operations *ops; + struct list_head link; /* internal link */ +}; + +struct fscache_netfs_operations +{ +}; + +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +extern int __fscache_register_netfs(struct fscache_netfs *netfs); +extern void __fscache_unregister_netfs(struct fscache_netfs *netfs); +#endif + +static inline +int fscache_register_netfs(struct fscache_netfs *netfs) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + return __fscache_register_netfs(netfs); +#else + return 0; +#endif +} + +static inline +void fscache_unregister_netfs(struct fscache_netfs *netfs) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + __fscache_unregister_netfs(netfs); +#endif +} + +/*****************************************************************************/ +/* + * look up a cache tag + * - cache tags are used to select specific caches in which to cache indexes + */ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +extern struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *name); +extern void __fscache_release_cache_tag(struct fscache_cache_tag *tag); +#endif + +static inline +struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + return __fscache_lookup_cache_tag(name); +#else + return NULL; +#endif +} + +static inline +void fscache_release_cache_tag(struct fscache_cache_tag *tag) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + __fscache_release_cache_tag(tag); +#endif +} + +/*****************************************************************************/ +/* + * set the data size on a cached object + * - no pages beyond the end of the object will be accessible + * - returns -ENOBUFS if the file is not backed + * - returns -ENOSPC if a pinned file of that size can't be stored + * - returns 0 if okay + */ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +extern int __fscache_set_i_size(struct fscache_cookie *cookie, loff_t i_size); +#endif + +static inline +int fscache_set_i_size(struct fscache_cookie *cookie, loff_t i_size) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + if (cookie) + return __fscache_set_i_size(cookie, i_size); +#endif + return -ENOBUFS; +} + +/*****************************************************************************/ +/* + * reserve data space for a cached object + * - returns -ENOBUFS if the file is not backed + * - returns -ENOSPC if there isn't enough space to honour the reservation + * - returns 0 if okay + */ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +extern int __fscache_reserve_space(struct fscache_cookie *cookie, loff_t size); +#endif + +static inline +int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + if (cookie) + return __fscache_reserve_space(cookie, size); +#endif + return -ENOBUFS; +} + +/*****************************************************************************/ +/* + * read a page from the cache or allocate a block in which to store it + * - if the page is not backed by a file: + * - -ENOBUFS will be returned and nothing more will be done + * - else if the page is backed by a block in the cache: + * - a read will be started which will call end_io_func on completion + * - else if the page is unbacked: + * - a block will be allocated + * - -ENODATA will be returned + */ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +extern int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp); +#endif + +static inline +int fscache_read_or_alloc_page(struct fscache_cookie *cookie, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + if (cookie) + return __fscache_read_or_alloc_page(cookie, page, end_io_func, + context, gfp); +#endif + return -ENOBUFS; +} + +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +extern int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp); +#endif + +static inline +int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + if (cookie) + return __fscache_read_or_alloc_pages(cookie, mapping, pages, + nr_pages, end_io_func, + context, gfp); +#endif + return -ENOBUFS; +} + +/* + * allocate a block in which to store a page + * - if the page is not backed by a file: + * - -ENOBUFS will be returned and nothing more will be done + * - else + * - a block will be allocated if there isn't one + * - 0 will be returned + */ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +extern int __fscache_alloc_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp); +#endif + +static inline +int fscache_alloc_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + if (cookie) + return __fscache_alloc_page(cookie, page, gfp); +#endif + return -ENOBUFS; +} + +/* + * request a page be stored in the cache + * - this request may be ignored if no cache block is currently allocated, in + * which case it: + * - returns -ENOBUFS + * - if a cache block was already allocated: + * - a BIO will be dispatched to write the page (end_io_func will be called + * from the completion function) + * - returns 0 + */ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +extern int __fscache_write_page(struct fscache_cookie *cookie, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp); + +extern int __fscache_write_pages(struct fscache_cookie *cookie, + struct pagevec *pagevec, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp); +#endif + +static inline +int fscache_write_page(struct fscache_cookie *cookie, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + if (cookie) + return __fscache_write_page(cookie, page, end_io_func, + context, gfp); +#endif + return -ENOBUFS; +} + +static inline +int fscache_write_pages(struct fscache_cookie *cookie, + struct pagevec *pagevec, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + if (cookie) + return __fscache_write_pages(cookie, pagevec, end_io_func, + context, gfp); +#endif + return -ENOBUFS; +} + +/* + * indicate that caching is no longer required on a page + * - note: cannot cancel any outstanding BIOs between this page and the cache + */ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +extern void __fscache_uncache_page(struct fscache_cookie *cookie, + struct page *page); +extern void __fscache_uncache_pages(struct fscache_cookie *cookie, + struct pagevec *pagevec); +#endif + +static inline +void fscache_uncache_page(struct fscache_cookie *cookie, + struct page *page) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + if (cookie) + __fscache_uncache_page(cookie, page); +#endif +} + +static inline +void fscache_uncache_pagevec(struct fscache_cookie *cookie, + struct pagevec *pagevec) +{ +#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) + if (cookie) + __fscache_uncache_pages(cookie, pagevec); +#endif +} + +#endif /* _LINUX_FSCACHE_H */ diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h index 26b4c83..15199cc 100644 --- a/include/linux/nfs4_mount.h +++ b/include/linux/nfs4_mount.h @@ -65,6 +65,7 @@ #define NFS4_MOUNT_INTR 0x0002 /* 1 */ #define NFS4_MOUNT_NOCTO 0x0010 /* 1 */ #define NFS4_MOUNT_NOAC 0x0020 /* 1 */ #define NFS4_MOUNT_STRICTLOCK 0x1000 /* 1 */ +#define NFS4_MOUNT_FSCACHE 0x4000 /* 1 */ #define NFS4_MOUNT_FLAGMASK 0xFFFF #endif diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 55ea853..71cf935 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -29,6 +29,7 @@ #include #include #include +#include /* * Enable debugging support for nfs client. @@ -180,6 +181,9 @@ #ifdef CONFIG_NFS_V4 int delegation_state; struct rw_semaphore rwsem; #endif /* CONFIG_NFS_V4*/ +#ifdef CONFIG_NFS_FSCACHE + struct fscache_cookie *fscache; +#endif struct inode vfs_inode; }; @@ -209,7 +213,7 @@ #define NFS_SB(s) ((struct nfs_server * #define NFS_FH(inode) (&NFS_I(inode)->fh) #define NFS_SERVER(inode) (NFS_SB(inode->i_sb)) #define NFS_CLIENT(inode) (NFS_SERVER(inode)->client) -#define NFS_PROTO(inode) (NFS_SERVER(inode)->rpc_ops) +#define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops) #define NFS_ADDR(inode) (RPC_PEERADDR(NFS_CLIENT(inode))) #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) #define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies) @@ -581,6 +585,8 @@ #define NFSDBG_XDR 0x0020 #define NFSDBG_FILE 0x0040 #define NFSDBG_ROOT 0x0080 #define NFSDBG_CALLBACK 0x0100 +#define NFSDBG_CLIENT 0x0200 +#define NFSDBG_FSCACHE 0x0400 #define NFSDBG_ALL 0xFFFF #ifdef __KERNEL__ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6b4a13c..d0c926f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -3,17 +3,88 @@ #define _NFS_FS_SB #include #include +#include struct nfs_iostats; /* + * The nfs_client identifies our client state to the server. + */ +struct nfs_client { + atomic_t cl_count; + int cl_cons_state; /* current construction state (-ve: init error) */ +#define NFS_CS_READY 0 /* ready to be used */ +#define NFS_CS_INITING 1 /* busy initialising */ + int cl_nfsversion; /* NFS protocol version */ + rpc_authflavor_t cl_authflavour; + unsigned long cl_svcs_state; /* NFS services state */ +#define NFS_CS_RPCIOD 0 /* - rpciod started */ +#define NFS_CS_CALLBACK 1 /* - callback started */ +#define NFS_CS_IDMAP 2 /* - idmap started */ + struct sockaddr_in cl_addr; /* server identifier */ + char * cl_hostname; /* hostname of server */ + struct list_head cl_share_link; /* link in global client list */ + struct list_head cl_superblocks; /* List of nfs_server structs */ + + struct rpc_clnt * cl_rpcclient; + struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ + unsigned long retrans_timeo; /* retransmit timeout */ + unsigned int retrans_count; /* number of retransmit tries */ + +#ifdef CONFIG_NFS_V4 + u64 cl_clientid; /* constant */ + nfs4_verifier cl_confirm; + unsigned long cl_state; + + u32 cl_lockowner_id; + + /* + * The following rwsem ensures exclusive access to the server + * while we recover the state following a lease expiration. + */ + struct rw_semaphore cl_sem; + + struct list_head cl_delegations; + struct list_head cl_state_owners; + struct list_head cl_unused; + int cl_nunused; + spinlock_t cl_lock; + + unsigned long cl_lease_time; + unsigned long cl_last_renewal; + struct work_struct cl_renewd; + + struct rpc_wait_queue cl_rpcwaitq; + + /* used for the setclientid verifier */ + struct timespec cl_boot_time; + + /* idmapper */ + struct idmap * cl_idmap; + + /* Our own IP address, as a null-terminated string. + * This is used to generate the clientid, and the callback address. + */ + char cl_ipaddr[16]; + unsigned char cl_id_uniquifier; +#endif + +#ifdef CONFIG_NFS_FSCACHE + struct fscache_cookie *fscache; /* client index cache cookie */ +#endif +}; + +/* * NFS client parameters stored in the superblock. */ struct nfs_server { + struct nfs_client * nfs_client; /* shared client and NFS4 state */ + struct list_head client_link; /* List of other nfs_server structs + * that share the same client + */ + struct list_head master_link; /* link in master servers list */ struct rpc_clnt * client; /* RPC client handle */ - struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ - struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ struct nfs_iostats * io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; int flags; /* various flags */ @@ -29,24 +100,14 @@ struct nfs_server { unsigned int acregmax; unsigned int acdirmin; unsigned int acdirmax; - unsigned long retrans_timeo; /* retransmit timeout */ - unsigned int retrans_count; /* number of retransmit tries */ unsigned int namelen; - char * hostname; /* remote hostname */ - struct nfs_fh fh; - struct sockaddr_in addr; + struct nfs_fsid fsid; + __u64 maxfilesize; /* maximum file size */ unsigned long mount_time; /* when this fs was mounted */ + dev_t s_dev; /* superblock dev numbers */ + #ifdef CONFIG_NFS_V4 - /* Our own IP address, as a null-terminated string. - * This is used to generate the clientid, and the callback address. - */ - char ip_addr[16]; - char * mnt_path; - struct nfs4_client * nfs4_state; /* all NFSv4 state starts here */ - struct list_head nfs4_siblings; /* List of other nfs_server structs - * that share the same clientid - */ u32 attr_bitmask[2];/* V4 bitmask representing the set of attributes supported on this filesystem */ @@ -54,6 +115,7 @@ #ifdef CONFIG_NFS_V4 that are supported on this filesystem */ #endif + void (*destroy)(struct nfs_server *); }; /* Server capabilities */ diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 102e560..15a9f3b 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -62,15 +62,15 @@ struct idmap_msg { #ifdef __KERNEL__ /* Forward declaration to make this header independent of others */ -struct nfs4_client; +struct nfs_client; -void nfs_idmap_new(struct nfs4_client *); -void nfs_idmap_delete(struct nfs4_client *); +int nfs_idmap_new(struct nfs_client *); +void nfs_idmap_delete(struct nfs_client *); -int nfs_map_name_to_uid(struct nfs4_client *, const char *, size_t, __u32 *); -int nfs_map_group_to_gid(struct nfs4_client *, const char *, size_t, __u32 *); -int nfs_map_uid_to_name(struct nfs4_client *, __u32, char *); -int nfs_map_gid_to_group(struct nfs4_client *, __u32, char *); +int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *); +int nfs_map_group_to_gid(struct nfs_client *, const char *, size_t, __u32 *); +int nfs_map_uid_to_name(struct nfs_client *, __u32, char *); +int nfs_map_gid_to_group(struct nfs_client *, __u32, char *); extern unsigned int nfs_idmap_cache_timeout; #endif /* __KERNEL__ */ diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index 659c754..278bb4e 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -61,6 +61,7 @@ #define NFS_MOUNT_BROKEN_SUID 0x0400 /* #define NFS_MOUNT_NOACL 0x0800 /* 4 */ #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ +#define NFS_MOUNT_FSCACHE 0x4000 #define NFS_MOUNT_FLAGMASK 0xFFFF #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2d3fb64..24ceac1 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -770,6 +770,9 @@ struct nfs_rpc_ops { int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); + int (*lookupfh)(struct nfs_server *, struct nfs_fh *, + struct qstr *, struct nfs_fh *, + struct nfs_fattr *); int (*getattr) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); int (*setattr) (struct dentry *, struct nfs_fattr *, @@ -806,6 +809,7 @@ struct nfs_rpc_ops { struct nfs_fsinfo *); int (*pathconf) (struct nfs_server *, struct nfs_fh *, struct nfs_pathconf *); + int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus); void (*read_setup) (struct nfs_read_data *); int (*read_done) (struct rpc_task *, struct nfs_read_data *); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5748642..6e017b7 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -71,7 +71,7 @@ #define PG_lru 5 #define PG_active 6 #define PG_slab 7 /* slab debug (Suparna wants this) */ -#define PG_checked 8 /* kill me in 2.5.. */ +#define PG_fs_misc 8 #define PG_arch_1 9 #define PG_reserved 10 #define PG_private 11 /* Has something at ->private */ @@ -161,10 +161,6 @@ #else #define PageHighMem(page) 0 /* needed to optimize away at compile time */ #endif -#define PageChecked(page) test_bit(PG_checked, &(page)->flags) -#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) -#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags) - #define PageReserved(page) test_bit(PG_reserved, &(page)->flags) #define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) #define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) @@ -263,4 +259,13 @@ static inline void set_page_writeback(st test_set_page_writeback(page); } +/* + * Filesystem-specific page bit testing + */ +#define PageFsMisc(page) test_bit(PG_fs_misc, &(page)->flags) +#define SetPageFsMisc(page) set_bit(PG_fs_misc, &(page)->flags) +#define TestSetPageFsMisc(page) test_and_set_bit(PG_fs_misc, &(page)->flags) +#define ClearPageFsMisc(page) clear_bit(PG_fs_misc, &(page)->flags) +#define TestClearPageFsMisc(page) test_and_clear_bit(PG_fs_misc, &(page)->flags) + #endif /* PAGE_FLAGS_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0a2f5d2..acc3481 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -170,6 +170,23 @@ static inline void wait_on_page_writebac extern void end_page_writeback(struct page *page); /* + * Wait for filesystem-specific page synchronisation to complete + */ +static inline void wait_on_page_fs_misc(struct page *page) +{ + if (PageFsMisc(page)) + wait_on_page_bit(page, PG_fs_misc); +} + +extern void fastcall end_page_fs_misc(struct page *page); + +/* + * permit installation of a state change monitor in the queue for a page + */ +extern void install_page_waitqueue_monitor(struct page *page, + wait_queue_t *monitor); + +/* * Fault a userspace page into pagetables. Return non-zero on a fault. * * This assumes that two userspace pages are always sufficient. That's diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e4b1a4d..5bbbc79 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -794,6 +794,7 @@ enum FS_AIO_NR=18, /* current system-wide number of aio requests */ FS_AIO_MAX_NR=19, /* system-wide maximum number of aio requests */ FS_INOTIFY=20, /* inotify submenu */ + FS_FILE_KERNEL=21, /* int: number of internal kernel files */ }; /* /proc/sys/fs/quota/ */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ae40ac8..61b484f 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1314,6 +1314,8 @@ #endif context->names[idx].ino = (unsigned long)-1; } +EXPORT_SYMBOL_GPL(__audit_inode_child); + /** * auditsc_get_stamp - get local copies of audit_context values * @ctx: audit_context for the task diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 362a0cc..32043a2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -52,6 +52,9 @@ #include extern int proc_nr_files(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); +extern int proc_files_kernel(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); + #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ @@ -993,6 +996,14 @@ static ctl_table fs_table[] = { .proc_handler = &proc_dointvec, }, { + .ctl_name = FS_FILE_KERNEL, + .procname = "file-kernel", + .data = &files_stat, + .maxlen = 1*sizeof(int), + .mode = 0444, + .proc_handler = &proc_files_kernel, + }, + { .ctl_name = FS_DENTRY, .procname = "dentry-state", .data = &dentry_stat, diff --git a/mm/filemap.c b/mm/filemap.c index d087fc3..2d50899 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -520,6 +520,18 @@ void fastcall wait_on_page_bit(struct pa } EXPORT_SYMBOL(wait_on_page_bit); +void install_page_waitqueue_monitor(struct page *page, wait_queue_t *monitor) +{ + wait_queue_head_t *q = page_waitqueue(page); + unsigned long flags; + + spin_lock_irqsave(&q->lock, flags); + __add_wait_queue(q, monitor); + spin_unlock_irqrestore(&q->lock, flags); +} + +EXPORT_SYMBOL_GPL(install_page_waitqueue_monitor); + /** * unlock_page - unlock a locked page * @page: the page @@ -577,6 +589,23 @@ void fastcall __lock_page(struct page *p } EXPORT_SYMBOL(__lock_page); +/* + * Note completion of filesystem specific page synchronisation + * + * This is used to allow a page to be written to a filesystem cache in the + * background without holding up the completion of readpage + */ +void fastcall end_page_fs_misc(struct page *page) +{ + smp_mb__before_clear_bit(); + if (!TestClearPageFsMisc(page)) + BUG(); + smp_mb__after_clear_bit(); + __wake_up_bit(page_waitqueue(page), &page->flags, PG_fs_misc); +} + +EXPORT_SYMBOL(end_page_fs_misc); + /** * find_get_page - find and get a page reference * @mapping: the address_space to search @@ -2220,6 +2249,97 @@ zero_length_segment: } EXPORT_SYMBOL(generic_file_buffered_write); +/* + * This writes the data from the source page to the specified page offset in + * the nominated file + * - the source page does not need to have any association with the file or the + * page offset + */ +int +generic_file_buffered_write_one_kernel_page(struct file *file, + pgoff_t index, + struct page *src) +{ + struct address_space *mapping = file->f_mapping; + struct address_space_operations *a_ops = mapping->a_ops; + struct pagevec lru_pvec; + struct page *page, *cached_page = NULL; + long status = 0; + + pagevec_init(&lru_pvec, 0); + + page = __grab_cache_page(mapping, index, &cached_page, &lru_pvec); + if (!page) { + BUG_ON(cached_page); + return -ENOMEM; + } + + status = a_ops->prepare_write(file, page, 0, PAGE_CACHE_SIZE); + if (unlikely(status)) { + loff_t isize = i_size_read(mapping->host); + + if (status != AOP_TRUNCATED_PAGE) + unlock_page(page); + page_cache_release(page); + if (status == AOP_TRUNCATED_PAGE) + goto sync; + + /* prepare_write() may have instantiated a few blocks outside + * i_size. Trim these off again. + */ + if ((1ULL << (index + 1)) > isize) + vmtruncate(mapping->host, isize); + goto sync; + } + + copy_highpage(page, src); + flush_dcache_page(page); + + status = a_ops->commit_write(file, page, 0, PAGE_CACHE_SIZE); + if (status == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto sync; + } + + if (status > 0) + status = 0; + + unlock_page(page); + mark_page_accessed(page); + page_cache_release(page); + if (status < 0) + return status; + + balance_dirty_pages_ratelimited(mapping); + cond_resched(); + +sync: + if (cached_page) + page_cache_release(cached_page); + + /* + * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC + */ + if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(mapping->host))) { + if (!a_ops->writepage) + status = generic_osync_inode( + mapping->host, mapping, + OSYNC_METADATA | OSYNC_DATA); + } + + /* + * If we get here for O_DIRECT writes then we must have fallen through + * to buffered writes (block instantiation inside i_size). So we sync + * the file data here, to try to honour O_DIRECT expectations. + */ + if (unlikely(file->f_flags & O_DIRECT)) + status = filemap_write_and_wait(mapping); + + pagevec_lru_add(&lru_pvec); + return status; +} +EXPORT_SYMBOL(generic_file_buffered_write_one_kernel_page); + static ssize_t __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) diff --git a/mm/migrate.c b/mm/migrate.c index 3f1e0c2..08c9fff 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -348,8 +348,8 @@ static void migrate_page_copy(struct pag SetPageUptodate(newpage); if (PageActive(page)) SetPageActive(newpage); - if (PageChecked(page)) - SetPageChecked(newpage); + if (PageFsMisc(page)) + SetPageFsMisc(newpage); if (PageMappedToDisk(page)) SetPageMappedToDisk(newpage); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 54a4f53..8bb003b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -550,7 +550,7 @@ static int prep_new_page(struct page *pa page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1 | - 1 << PG_checked | 1 << PG_mappedtodisk); + 1 << PG_fs_misc | 1 << PG_mappedtodisk); set_page_private(page, 0); set_page_refcounted(page); kernel_map_pages(page, 1 << order, 1); diff --git a/mm/readahead.c b/mm/readahead.c index aa7ec42..86a40db 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -14,6 +14,7 @@ #include #include #include #include +#include void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { @@ -117,6 +118,26 @@ static inline unsigned long get_next_ra_ #define list_to_page(head) (list_entry((head)->prev, struct page, lru)) +/* + * see if a page needs releasing upon read_cache_pages() failure + * - the caller of read_cache_pages() may have set PG_private before calling, + * such as the NFS fs marking pages that are cached locally on disk, thus we + * need to give the fs a chance to clean up in the event of an error + */ +static inline void read_cache_pages_release_page(struct address_space *mapping, + struct page *page) +{ + if (PagePrivate(page)) { + if (TestSetPageLocked(page)) + BUG(); + page->mapping = mapping; + try_to_release_page(page, GFP_KERNEL); + page->mapping = NULL; + unlock_page(page); + } + page_cache_release(page); +} + /** * read_cache_pages - populate an address space with some pages & start reads against them * @mapping: the address_space @@ -140,7 +161,7 @@ int read_cache_pages(struct address_spac page = list_to_page(pages); list_del(&page->lru); if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) { - page_cache_release(page); + read_cache_pages_release_page(mapping, page); continue; } ret = filler(data, page); @@ -152,7 +173,7 @@ int read_cache_pages(struct address_spac victim = list_to_page(pages); list_del(&victim->lru); - page_cache_release(victim); + read_cache_pages_release_page(mapping, victim); } break; }