All of the above --- Index: linux-2.6.12-rc3/fs/Kconfig =================================================================== --- linux-2.6.12-rc3.orig/fs/Kconfig +++ linux-2.6.12-rc3/fs/Kconfig @@ -1318,6 +1318,7 @@ config NFS_FS depends on INET select LOCKD select SUNRPC + select NFS_ACL_SUPPORT if NFS_V3_ACL help If you are connected to some other (usually local) Unix computer (using SLIP, PLIP, PPP or Ethernet) and want to mount files residing @@ -1360,6 +1361,16 @@ config NFS_V3 If unsure, say Y. +config NFS_V3_ACL + bool "Provide client support for the NFSv3 ACL protocol extension" + depends on NFS_V3 + help + Implement the NFSv3 ACL protocol extension for manipulating POSIX + Access Control Lists. The server should also be compiled with + the NFSv3 ACL protocol extension; see the CONFIG_NFSD_V3_ACL option. + + If unsure, say N. + config NFS_V4 bool "Provide NFSv4 client support (EXPERIMENTAL)" depends on NFS_FS && EXPERIMENTAL @@ -1403,6 +1414,7 @@ config NFSD select LOCKD select SUNRPC select EXPORTFS + select NFS_ACL_SUPPORT if NFSD_V3_ACL || NFSD_V2_ACL help If you want your Linux box to act as an NFS *server*, so that other computers on your local network which support NFS can access certain @@ -1426,6 +1438,10 @@ config NFSD To compile the NFS server support as a module, choose M here: the module will be called nfsd. If unsure, say N. +config NFSD_V2_ACL + bool + depends on NFSD + config NFSD_V3 bool "Provide NFSv3 server support" depends on NFSD @@ -1433,6 +1449,16 @@ config NFSD_V3 If you would like to include the NFSv3 server as well as the NFSv2 server, say Y here. If unsure, say Y. +config NFSD_V3_ACL + bool "Provide server support for the NFSv3 ACL protocol extension" + depends on NFSD_V3 + select NFSD_V2_ACL + help + Implement the NFSv3 ACL protocol extension for manipulating POSIX + Access Control Lists on exported file systems. NFS clients should + be compiled with the NFSv3 ACL protocol extension; see the + CONFIG_NFS_V3_ACL option. If unsure, say N. + config NFSD_V4 bool "Provide NFSv4 server support (EXPERIMENTAL)" depends on NFSD_V3 && EXPERIMENTAL @@ -1477,6 +1503,15 @@ config LOCKD_V4 config EXPORTFS tristate +config NFS_ACL_SUPPORT + tristate + select FS_POSIX_ACL + +config NFS_COMMON + bool + depends on NFSD || NFS_FS + default y + config SUNRPC tristate @@ -1516,6 +1551,20 @@ config RPCSEC_GSS_SPKM3 If unsure, say N. +config RPCSEC_GSS_KEYRING + bool "Secure RPC: keyring support (EXPERIMENTAL)" + depends on SUNRPC_GSS && KEYS && EXPERIMENTAL + help + Use the new RPCSEC_GSS upcall mechanism based on keyrings. + This allows individual threads, processes or groups of + processes to specify their own authentication tokens, + providing much the same functionality that AFS pags used to. + + Note: requires the new helper program /sbin/request-key, as + well as an updated rpc.gssd daemon in order to work. + + If unsure, say N + config SMB_FS tristate "SMB file system support (to mount Windows shares etc.)" depends on INET Index: linux-2.6.12-rc3/fs/Makefile =================================================================== --- linux-2.6.12-rc3.orig/fs/Makefile +++ linux-2.6.12-rc3/fs/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat obj-$(CONFIG_FS_MBCACHE) += mbcache.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o +obj-$(CONFIG_NFS_COMMON) += nfs_common/ obj-$(CONFIG_QUOTA) += dquot.o obj-$(CONFIG_QFMT_V1) += quota_v1.o Index: linux-2.6.12-rc3/fs/exec.c =================================================================== --- linux-2.6.12-rc3.orig/fs/exec.c +++ linux-2.6.12-rc3/fs/exec.c @@ -123,7 +123,7 @@ asmlinkage long sys_uselib(const char __ struct nameidata nd; int error; - nd.intent.open.flags = FMODE_READ; + nd_init_open_intent(&nd, FMODE_READ, 0); error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); if (error) goto out; @@ -136,7 +136,7 @@ asmlinkage long sys_uselib(const char __ if (error) goto exit; - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = nd_open_file(&nd, O_RDONLY); error = PTR_ERR(file); if (IS_ERR(file)) goto out; @@ -164,7 +164,7 @@ asmlinkage long sys_uselib(const char __ out: return error; exit: - path_release(&nd); + path_release_open_intent(&nd); goto out; } @@ -491,7 +491,7 @@ struct file *open_exec(const char *name) int err; struct file *file; - nd.intent.open.flags = FMODE_READ; + nd_init_open_intent(&nd, FMODE_READ, 0); err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); file = ERR_PTR(err); @@ -505,7 +505,7 @@ struct file *open_exec(const char *name) err = -EACCES; file = ERR_PTR(err); if (!err) { - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = nd_open_file(&nd, O_RDONLY); if (!IS_ERR(file)) { err = deny_write_access(file); if (err) { @@ -517,7 +517,7 @@ out: return file; } } - path_release(&nd); + path_release_open_intent(&nd); } goto out; } Index: linux-2.6.12-rc3/fs/lockd/host.c =================================================================== --- linux-2.6.12-rc3.orig/fs/lockd/host.c +++ linux-2.6.12-rc3/fs/lockd/host.c @@ -189,17 +189,15 @@ nlm_bind_host(struct nlm_host *host) goto forgetit; xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); + xprt->nocong = 1; /* No congestion control for NLM */ + xprt->resvport = 1; /* NLM requires a reserved port */ /* Existing NLM servers accept AUTH_UNIX only */ clnt = rpc_create_client(xprt, host->h_name, &nlm_program, host->h_version, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) { - xprt_destroy(xprt); + if (IS_ERR(clnt)) goto forgetit; - } clnt->cl_autobind = 1; /* turn on pmap queries */ - xprt->nocong = 1; /* No congestion control for NLM */ - xprt->resvport = 1; /* NLM requires a reserved port */ host->h_rpcclnt = clnt; } Index: linux-2.6.12-rc3/fs/lockd/mon.c =================================================================== --- linux-2.6.12-rc3.orig/fs/lockd/mon.c +++ linux-2.6.12-rc3/fs/lockd/mon.c @@ -115,20 +115,19 @@ nsm_create(void) xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL); if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; + xprt->resvport = 1; /* NSM requires a reserved port */ clnt = rpc_create_client(xprt, "localhost", &nsm_program, SM_VERSION, RPC_AUTH_NULL); if (IS_ERR(clnt)) - goto out_destroy; + goto out_err; clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; - xprt->resvport = 1; /* NSM requires a reserved port */ return clnt; -out_destroy: - xprt_destroy(xprt); +out_err: return clnt; } Index: linux-2.6.12-rc3/fs/namei.c =================================================================== --- linux-2.6.12-rc3.orig/fs/namei.c +++ linux-2.6.12-rc3/fs/namei.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -318,6 +319,19 @@ void path_release_on_umount(struct namei } /* + * Open intents have to release any file pointer that was allocated + * but not used by the VFS. + */ +void path_release_open_intent(struct nameidata *nd) +{ + if ((nd->flags & LOOKUP_OPEN) && nd->intent.open.file != NULL) { + fput(nd->intent.open.file); + nd->intent.open.file = NULL; + } + path_release(nd); +} + +/* * Internal lookup() using the new generic dcache. * SMP-safe */ @@ -714,6 +728,7 @@ static fastcall int __link_path_walk(con struct qstr this; unsigned int c; + nd->flags |= LOOKUP_CONTINUE; err = exec_permission_lite(inode, nd); if (err == -EAGAIN) { err = permission(inode, MAY_EXEC, nd); @@ -766,7 +781,6 @@ static fastcall int __link_path_walk(con if (err < 0) break; } - nd->flags |= LOOKUP_CONTINUE; /* This does the actual lookups.. */ err = do_lookup(nd, &this, &next); if (err) @@ -1406,8 +1420,7 @@ int open_namei(const char * pathname, in acc_mode |= MAY_APPEND; /* Fill in the open() intent data */ - nd->intent.open.flags = flag; - nd->intent.open.create_mode = mode; + nd_init_open_intent(nd, flag, mode); /* * The simplest case - just a plain lookup. @@ -1498,7 +1511,7 @@ ok: exit_dput: dput(dentry); exit: - path_release(nd); + path_release_open_intent(nd); return error; do_link: Index: linux-2.6.12-rc3/fs/namespace.c =================================================================== --- linux-2.6.12-rc3.orig/fs/namespace.c +++ linux-2.6.12-rc3/fs/namespace.c @@ -265,6 +265,44 @@ struct seq_operations mounts_op = { .show = show_vfsmnt }; +static int show_vfsstat(struct seq_file *m, void *v) +{ + struct vfsmount *mnt = v; + int err = 0; + + /* device */ + if (mnt->mnt_devname) { + seq_puts(m, "device "); + mangle(m, mnt->mnt_devname); + } else + seq_puts(m, "no device"); + + /* mount point */ + seq_puts(m, " mounted on "); + seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + seq_putc(m, ' '); + + /* file system type */ + seq_puts(m, "with fstype "); + mangle(m, mnt->mnt_sb->s_type->name); + + /* optional statistics */ + if (mnt->mnt_sb->s_op->show_stats) { + seq_putc(m, ' '); + err = mnt->mnt_sb->s_op->show_stats(m, mnt); + } + + seq_putc(m, '\n'); + return err; +} + +struct seq_operations mountstats_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_vfsstat, +}; + /** * may_umount_tree - check if a mount tree is busy * @mnt: root of mount tree Index: linux-2.6.12-rc3/fs/nfs/Makefile =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/Makefile +++ linux-2.6.12-rc3/fs/nfs/Makefile @@ -5,9 +5,11 @@ obj-$(CONFIG_NFS_FS) += nfs.o nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \ - proc.o read.o symlink.o unlink.o write.o + proc.o read.o symlink.o unlink.o write.o \ + namespace.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o +nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o Index: linux-2.6.12-rc3/fs/nfs/callback.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/callback.c +++ linux-2.6.12-rc3/fs/nfs/callback.c @@ -14,6 +14,7 @@ #include #include #include +#include "nfs4_fs.h" #include "callback.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK Index: linux-2.6.12-rc3/fs/nfs/callback_proc.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/callback_proc.c +++ linux-2.6.12-rc3/fs/nfs/callback_proc.c @@ -8,6 +8,7 @@ #include #include #include +#include "nfs4_fs.h" #include "callback.h" #include "delegation.h" Index: linux-2.6.12-rc3/fs/nfs/callback_xdr.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/callback_xdr.c +++ linux-2.6.12-rc3/fs/nfs/callback_xdr.c @@ -10,6 +10,7 @@ #include #include #include +#include "nfs4_fs.h" #include "callback.h" #define CB_OP_TAGLEN_MAXSZ (512) @@ -410,7 +411,6 @@ static int nfs4_callback_compound(struct xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); - rqstp->rq_res.head[0].iov_len = PAGE_SIZE; xdr_init_encode(&xdr_out, &rqstp->rq_res, p); decode_compound_hdr_arg(&xdr_in, &hdr_arg); Index: linux-2.6.12-rc3/fs/nfs/delegation.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/delegation.c +++ linux-2.6.12-rc3/fs/nfs/delegation.c @@ -16,6 +16,7 @@ #include #include +#include "nfs4_fs.h" #include "delegation.h" static struct nfs_delegation *nfs_alloc_delegation(void) @@ -194,6 +195,31 @@ restart: } /* + * Return a delegation for clear_inode() + */ +void nfs_inode_clear_delegation(struct inode *inode) +{ + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + + down_read(&clp->cl_sem); + spin_lock(&clp->cl_lock); + delegation = nfsi->delegation; + if (delegation != NULL) { + list_del_init(&delegation->super_list); + nfsi->delegation = NULL; + nfsi->delegation_state = 0; + } + spin_unlock(&clp->cl_lock); + up_read(&clp->cl_sem); + if (delegation != NULL) { + nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); + nfs_free_delegation(delegation); + } +} + +/* * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. */ void nfs_handle_cb_pathdown(struct nfs4_client *clp) Index: linux-2.6.12-rc3/fs/nfs/delegation.h =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/delegation.h +++ linux-2.6.12-rc3/fs/nfs/delegation.h @@ -27,6 +27,7 @@ int nfs_inode_set_delegation(struct inod void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); int nfs_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); +void nfs_inode_clear_delegation(struct inode *inode); struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); void nfs_return_all_delegations(struct super_block *sb); Index: linux-2.6.12-rc3/fs/nfs/dir.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/dir.c +++ linux-2.6.12-rc3/fs/nfs/dir.c @@ -27,11 +27,13 @@ #include #include #include +#include "nfs_iostat.h" #include #include #include #include +#include "nfs4_fs.h" #include "delegation.h" #define NFS_PARANOIA 1 @@ -50,8 +52,10 @@ static int nfs_mknod(struct inode *, str static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); static int nfs_fsync_dir(struct file *, struct dentry *, int); +static loff_t nfs_llseek_dir(struct file *, loff_t, int); struct file_operations nfs_dir_operations = { + .llseek = nfs_llseek_dir, .read = generic_read_dir, .readdir = nfs_readdir, .open = nfs_opendir, @@ -74,6 +78,27 @@ struct inode_operations nfs_dir_inode_op .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V3 +struct inode_operations nfs3_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; +#endif /* CONFIG_NFS_V3 */ + #ifdef CONFIG_NFS_V4 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); @@ -90,6 +115,9 @@ struct inode_operations nfs4_dir_inode_o .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, }; #endif /* CONFIG_NFS_V4 */ @@ -116,7 +144,8 @@ typedef struct { struct page *page; unsigned long page_index; u32 *ptr; - u64 target; + u64 *dir_cookie; + loff_t current_index; struct nfs_entry *entry; decode_dirent_t decode; int plus; @@ -164,12 +193,10 @@ int nfs_readdir_filler(nfs_readdir_descr NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either - * throught inode->i_sem or some other mechanism. + * through inode->i_sem or some other mechanism. */ - if (page->index == 0) { - invalidate_inode_pages(inode->i_mapping); - NFS_I(inode)->readdir_timestamp = timestamp; - } + if (page->index == 0) + invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1); unlock_page(page); return 0; error: @@ -202,22 +229,22 @@ void dir_page_release(nfs_readdir_descri /* * Given a pointer to a buffer that has already been filled by a call - * to readdir, find the next entry. + * to readdir, find the next entry with cookie '*desc->dir_cookie'. * * If the end of the buffer has been reached, return -EAGAIN, if not, * return the offset within the buffer of the next entry to be * read. */ static inline -int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page) +int find_dirent(nfs_readdir_descriptor_t *desc) { struct nfs_entry *entry = desc->entry; int loop_count = 0, status; while((status = dir_decode(desc)) == 0) { - dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie); - if (entry->prev_cookie == desc->target) + dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie); + if (entry->prev_cookie == *desc->dir_cookie) break; if (loop_count++ > 200) { loop_count = 0; @@ -229,8 +256,44 @@ int find_dirent(nfs_readdir_descriptor_t } /* - * Find the given page, and call find_dirent() in order to try to - * return the next entry. + * Given a pointer to a buffer that has already been filled by a call + * to readdir, find the entry at offset 'desc->file->f_pos'. + * + * If the end of the buffer has been reached, return -EAGAIN, if not, + * return the offset within the buffer of the next entry to be + * read. + */ +static inline +int find_dirent_index(nfs_readdir_descriptor_t *desc) +{ + struct nfs_entry *entry = desc->entry; + int loop_count = 0, + status; + + for(;;) { + status = dir_decode(desc); + if (status) + break; + + dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index); + + if (desc->file->f_pos == desc->current_index) { + *desc->dir_cookie = entry->cookie; + break; + } + desc->current_index++; + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } + } + dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status); + return status; +} + +/* + * Find the given page, and call find_dirent() or find_dirent_index in + * order to try to return the next entry. */ static inline int find_dirent_page(nfs_readdir_descriptor_t *desc) @@ -253,7 +316,10 @@ int find_dirent_page(nfs_readdir_descrip /* NOTE: Someone else may have changed the READDIRPLUS flag */ desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ - status = find_dirent(desc, page); + if (*desc->dir_cookie != 0) + status = find_dirent(desc); + else + status = find_dirent_index(desc); if (status < 0) dir_page_release(desc); out: @@ -268,7 +334,8 @@ int find_dirent_page(nfs_readdir_descrip * Recurse through the page cache pages, and return a * filled nfs_entry structure of the next directory entry if possible. * - * The target for the search is 'desc->target'. + * The target for the search is '*desc->dir_cookie' if non-0, + * 'desc->file->f_pos' otherwise */ static inline int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) @@ -276,7 +343,16 @@ int readdir_search_pagecache(nfs_readdir int loop_count = 0; int res; - dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target); + /* Always search-by-index from the beginning of the cache */ + if (*desc->dir_cookie == 0) { + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos); + desc->page_index = 0; + desc->entry->cookie = desc->entry->prev_cookie = 0; + desc->entry->eof = 0; + desc->current_index = 0; + } else + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); + for (;;) { res = find_dirent_page(desc); if (res != -EAGAIN) @@ -313,7 +389,7 @@ int nfs_do_filldir(nfs_readdir_descripto int loop_count = 0, res; - dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target); + dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie); for(;;) { unsigned d_type = DT_UNKNOWN; @@ -333,10 +409,11 @@ int nfs_do_filldir(nfs_readdir_descripto } res = filldir(dirent, entry->name, entry->len, - entry->prev_cookie, fileid, d_type); + file->f_pos, fileid, d_type); if (res < 0) break; - file->f_pos = desc->target = entry->cookie; + file->f_pos++; + *desc->dir_cookie = entry->cookie; if (dir_decode(desc) != 0) { desc->page_index ++; break; @@ -349,7 +426,7 @@ int nfs_do_filldir(nfs_readdir_descripto dir_page_release(desc); if (dentry != NULL) dput(dentry); - dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res); + dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; } @@ -375,14 +452,14 @@ int uncached_readdir(nfs_readdir_descrip struct page *page = NULL; int status; - dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target); + dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); page = alloc_page(GFP_HIGHUSER); if (!page) { status = -ENOMEM; goto out; } - desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target, + desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, *desc->dir_cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); @@ -391,7 +468,7 @@ int uncached_readdir(nfs_readdir_descrip desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { if ((status = dir_decode(desc)) == 0) - desc->entry->prev_cookie = desc->target; + desc->entry->prev_cookie = *desc->dir_cookie; } else status = -EIO; if (status < 0) @@ -412,8 +489,9 @@ int uncached_readdir(nfs_readdir_descrip goto out; } -/* The file offset position is now represented as a true offset into the - * page cache as is the case in most of the other filesystems. +/* The file offset position represents the dirent entry number. A + last cookie cache takes care of the common case of reading the + whole directory. */ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { @@ -426,6 +504,8 @@ static int nfs_readdir(struct file *filp struct nfs_fattr fattr; long res; + nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); + lock_kernel(); res = nfs_revalidate_inode(NFS_SERVER(inode), inode); @@ -435,15 +515,15 @@ static int nfs_readdir(struct file *filp } /* - * filp->f_pos points to the file offset in the page cache. - * but if the cache has meanwhile been zapped, we need to - * read from the last dirent to revalidate f_pos - * itself. + * filp->f_pos points to the dirent entry number. + * *desc->dir_cookie has the cookie for the next entry. We have + * to either find the entry with the appropriate number or + * revalidate the cookie. */ memset(desc, 0, sizeof(*desc)); desc->file = filp; - desc->target = filp->f_pos; + desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); @@ -455,9 +535,10 @@ static int nfs_readdir(struct file *filp while(!desc->entry->eof) { res = readdir_search_pagecache(desc); + if (res == -EBADCOOKIE) { /* This means either end of directory */ - if (desc->entry->cookie != desc->target) { + if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc, dirent, filldir); if (res >= 0) @@ -490,6 +571,28 @@ static int nfs_readdir(struct file *filp return 0; } +loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) +{ + down(&filp->f_dentry->d_inode->i_sem); + switch (origin) { + case 1: + offset += filp->f_pos; + case 0: + if (offset >= 0) + break; + default: + offset = -EINVAL; + goto out; + } + if (offset != filp->f_pos) { + filp->f_pos = offset; + ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; + } +out: + up(&filp->f_dentry->d_inode->i_sem); + return offset; +} + /* * All directory operations under NFS are synchronous, so fsync() * is a dummy operation. @@ -594,6 +697,7 @@ static int nfs_lookup_revalidate(struct parent = dget_parent(dentry); lock_kernel(); dir = parent->d_inode; + nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = dentry->d_inode; if (!inode) { @@ -710,6 +814,17 @@ int nfs_is_exclusive_create(struct inode return (nd->intent.open.flags & O_EXCL) != 0; } +static inline int nfs_reval_fsid(struct inode *dir, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + struct nfs_server *server = NFS_SERVER(dir); + + if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) + /* Revalidate fsid on root dir */ + return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode); + return 0; +} + static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { struct dentry *res; @@ -720,6 +835,7 @@ static struct dentry *nfs_lookup(struct dfprintk(VFS, "NFS: lookup(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); + nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); res = ERR_PTR(-ENAMETOOLONG); if (dentry->d_name.len > NFS_SERVER(dir)->namelen) @@ -731,10 +847,8 @@ static struct dentry *nfs_lookup(struct lock_kernel(); /* Revalidate parent directory attribute cache */ error = nfs_revalidate_inode(NFS_SERVER(dir), dir); - if (error < 0) { - res = ERR_PTR(error); - goto out_unlock; - } + if (error < 0) + goto out_err; /* If we're doing an exclusive create, optimize away the lookup */ if (nfs_is_exclusive_create(dir, nd)) @@ -743,10 +857,11 @@ static struct dentry *nfs_lookup(struct error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); if (error == -ENOENT) goto no_entry; - if (error < 0) { - res = ERR_PTR(error); - goto out_unlock; - } + if (error < 0) + goto out_err; + error = nfs_reval_fsid(dir, &fhandle, &fattr); + if (error < 0) + goto out_err; res = ERR_PTR(-EACCES); inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); if (!inode) @@ -761,6 +876,9 @@ out_unlock: unlock_kernel(); out: return res; +out_err: + res = ERR_PTR(error); + goto out_unlock; } #ifdef CONFIG_NFS_V4 @@ -791,7 +909,6 @@ static int is_atomic_open(struct inode * static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct dentry *res = NULL; - struct inode *inode = NULL; int error; /* Check that we are indeed trying to open this file */ @@ -805,8 +922,10 @@ static struct dentry *nfs_atomic_lookup( dentry->d_op = NFS_PROTO(dir)->dentry_ops; /* Let vfs_create() deal with O_EXCL */ - if (nd->intent.open.flags & O_EXCL) - goto no_entry; + if (nd->intent.open.flags & O_EXCL) { + d_add(dentry, NULL); + goto out; + } /* Open the file on the server */ lock_kernel(); @@ -819,18 +938,18 @@ static struct dentry *nfs_atomic_lookup( if (nd->intent.open.flags & O_CREAT) { nfs_begin_data_update(dir); - inode = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); nfs_end_data_update(dir); } else - inode = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); unlock_kernel(); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); + if (IS_ERR(res)) { + error = PTR_ERR(res); switch (error) { /* Make a negative dentry */ case -ENOENT: - inode = NULL; - break; + res = NULL; + goto out; /* This turned out not to be a regular file */ case -ELOOP: if (!(nd->intent.open.flags & O_NOFOLLOW)) @@ -838,13 +957,9 @@ static struct dentry *nfs_atomic_lookup( /* case -EISDIR: */ /* case -EINVAL: */ default: - res = ERR_PTR(error); goto out; } - } -no_entry: - res = d_add_unique(dentry, inode); - if (res != NULL) + } else if (res != NULL) dentry = res; nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); @@ -888,7 +1003,7 @@ static int nfs_open_revalidate(struct de */ lock_kernel(); verifier = nfs_save_change_attribute(dir); - ret = nfs4_open_revalidate(dir, dentry, openflags); + ret = nfs4_open_revalidate(dir, dentry, openflags, nd); if (!ret) nfs_set_verifier(dentry, verifier); unlock_kernel(); @@ -1011,7 +1126,7 @@ static int nfs_create(struct inode *dir, lock_kernel(); nfs_begin_data_update(dir); - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); nfs_end_data_update(dir); if (error != 0) goto out_err; @@ -1122,6 +1237,7 @@ static int nfs_sillyrename(struct inode dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, atomic_read(&dentry->d_count)); + nfs_inc_stats(dir, NFSIOS_SILLYRENAME); #ifdef NFS_PARANOIA if (!dentry->d_inode) @@ -1506,6 +1622,8 @@ int nfs_permission(struct inode *inode, struct rpc_cred *cred; int res = 0; + nfs_inc_stats(inode, NFSIOS_VFSACCESS); + if (mask == 0) goto out; /* Is this sys_access() ? */ Index: linux-2.6.12-rc3/fs/nfs/direct.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/direct.c +++ linux-2.6.12-rc3/fs/nfs/direct.c @@ -47,6 +47,7 @@ #include #include +#include "nfs_iostat.h" #include #include @@ -66,6 +67,7 @@ struct nfs_direct_req { struct kref kref; /* release manager */ struct list_head list; /* nfs_read_data structs */ wait_queue_head_t wait; /* wait for i/o completion */ + struct inode * inode; /* target file of I/O */ struct page ** pages; /* pages in our buffer */ unsigned int npages; /* count of pages */ atomic_t complete, /* i/os we're waiting for */ @@ -207,6 +209,8 @@ static void nfs_direct_read_result(struc { struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + nfs_add_stats(dreq->inode, NFSIOS_SERVERREADBYTES, data->res.count); + if (likely(status >= 0)) atomic_add(data->res.count, &dreq->count); else @@ -347,6 +351,7 @@ static ssize_t nfs_direct_read_seg(struc dreq->pages = pages; dreq->npages = nr_pages; + dreq->inode = inode; rpc_clnt_sigmask(clnt, &oldset); nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count, @@ -354,6 +359,8 @@ static ssize_t nfs_direct_read_seg(struc result = nfs_direct_read_wait(dreq, clnt->cl_intr); rpc_clnt_sigunmask(clnt, &oldset); + if (result > 0) + nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, result); return result; } @@ -517,7 +524,7 @@ retry: result = tot_bytes; out: - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); nfs_writedata_free(wdata); return result; @@ -571,11 +578,14 @@ static ssize_t nfs_direct_write(struct i break; return result; } + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result); tot_bytes += result; file_offset += result; if (result < size) break; } + if (tot_bytes > 0) + nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, tot_bytes); return tot_bytes; } Index: linux-2.6.12-rc3/fs/nfs/file.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/file.c +++ linux-2.6.12-rc3/fs/nfs/file.c @@ -22,6 +22,7 @@ #include #include #include +#include "nfs_iostat.h" #include #include #include @@ -37,6 +38,7 @@ static int nfs_file_open(struct inode *, struct file *); static int nfs_file_release(struct inode *, struct file *); +static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin); static int nfs_file_mmap(struct file *, struct vm_area_struct *); static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t); @@ -48,7 +50,7 @@ static int nfs_lock(struct file *filp, i static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); struct file_operations nfs_file_operations = { - .llseek = remote_llseek, + .llseek = nfs_file_llseek, .read = do_sync_read, .write = do_sync_write, .aio_read = nfs_file_read, @@ -70,6 +72,18 @@ struct inode_operations nfs_file_inode_o .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V3 +struct inode_operations nfs3_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; +#endif /* CONFIG_NFS_v3 */ + /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) @@ -89,18 +103,15 @@ static int nfs_check_flags(int flags) static int nfs_file_open(struct inode *inode, struct file *filp) { - struct nfs_server *server = NFS_SERVER(inode); - int (*open)(struct inode *, struct file *); int res; res = nfs_check_flags(filp->f_flags); if (res) return res; + nfs_inc_stats(inode, NFSIOS_VFSOPEN); lock_kernel(); - /* Do NFSv4 open() call */ - if ((open = server->rpc_ops->file_open) != NULL) - res = open(inode, filp); + res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp); unlock_kernel(); return res; } @@ -111,9 +122,65 @@ nfs_file_release(struct inode *inode, st /* Ensure that dirty pages are flushed out with the right creds */ if (filp->f_mode & FMODE_WRITE) filemap_fdatawrite(filp->f_mapping); + nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return NFS_PROTO(inode)->file_release(inode, filp); } +/** + * nfs_revalidate_file - Revalidate the page cache & related metadata + * @inode - pointer to inode struct + * @file - pointer to file + */ +static int nfs_revalidate_file(struct inode *inode, struct file *filp) +{ + int retval = 0; + + if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) + retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + nfs_revalidate_mapping(inode, filp->f_mapping); + return 0; +} + +/** + * nfs_revalidate_size - Revalidate the file size + * @inode - pointer to inode struct + * @file - pointer to struct file + * + * Revalidates the file length. This is basically a wrapper around + * nfs_revalidate_inode() that takes into account the fact that we may + * have cached writes (in which case we don't care about the server's + * idea of what the file length is), or O_DIRECT (in which case we + * shouldn't trust the cache). + */ +static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_inode *nfsi = NFS_I(inode); + + if (server->flags & NFS_MOUNT_NOAC) + goto force_reval; + if (filp->f_flags & O_DIRECT) + goto force_reval; + if (nfsi->npages != 0) + return 0; + if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) + return 0; +force_reval: + return __nfs_revalidate_inode(server, inode); +} + +static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) +{ + /* origin == SEEK_END => we must revalidate the cached file length */ + if (origin == 2) { + struct inode *inode = filp->f_mapping->host; + int retval = nfs_revalidate_file_size(inode, filp); + if (retval < 0) + return (loff_t)retval; + } + return remote_llseek(filp, offset, origin); +} + /* * Flush all dirty pages, and check for write errors. * @@ -129,6 +196,7 @@ nfs_file_flush(struct file *file) if ((file->f_mode & FMODE_WRITE) == 0) return 0; + nfs_inc_stats(inode, NFSIOS_VFSFLUSH); lock_kernel(); /* Ensure that data+attribute caches are up to date after close() */ status = nfs_wb_all(inode); @@ -158,9 +226,11 @@ nfs_file_read(struct kiocb *iocb, char _ dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long) pos); - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + result = nfs_revalidate_file(inode, iocb->ki_filp); if (!result) result = generic_file_aio_read(iocb, buf, count, pos); + if (result > 0) + nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); return result; } @@ -176,7 +246,7 @@ nfs_file_sendfile(struct file *filp, lof dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long long) *ppos); - res = nfs_revalidate_inode(NFS_SERVER(inode), inode); + res = nfs_revalidate_file(inode, filp); if (!res) res = generic_file_sendfile(filp, ppos, count, actor, target); return res; @@ -192,7 +262,7 @@ nfs_file_mmap(struct file * file, struct dfprintk(VFS, "nfs: mmap(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); - status = nfs_revalidate_inode(NFS_SERVER(inode), inode); + status = nfs_revalidate_file(inode, file); if (!status) status = generic_file_mmap(file, vma); return status; @@ -212,6 +282,7 @@ nfs_fsync(struct file *file, struct dent dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + nfs_inc_stats(inode, NFSIOS_VFSFSYNC); lock_kernel(); status = nfs_wb_all(inode); if (!status) { @@ -281,15 +352,23 @@ nfs_file_write(struct kiocb *iocb, const result = -EBUSY; if (IS_SWAPFILE(inode)) goto out_swapfile; - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (result) - goto out; + /* + * O_APPEND implies that we must revalidate the file length. + */ + if (iocb->ki_filp->f_flags & O_APPEND) { + result = nfs_revalidate_file_size(inode, iocb->ki_filp); + if (result) + goto out; + } + nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); result = count; if (!count) goto out; result = generic_file_aio_write(iocb, buf, count, pos); + if (result > 0) + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, result); out: return result; @@ -430,13 +509,14 @@ static int nfs_lock(struct file *filp, i { struct inode * inode = filp->f_mapping->host; + if (!inode) + return -EINVAL; + dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", inode->i_sb->s_id, inode->i_ino, fl->fl_type, fl->fl_flags, (long long)fl->fl_start, (long long)fl->fl_end); - - if (!inode) - return -EINVAL; + nfs_inc_stats(inode, NFSIOS_VFSLOCK); /* No mandatory locks over NFS */ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) Index: linux-2.6.12-rc3/fs/nfs/idmap.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/idmap.c +++ linux-2.6.12-rc3/fs/nfs/idmap.c @@ -46,10 +46,10 @@ #include #include -#include #include #include +#include "nfs4_fs.h" #define IDMAP_HASH_SZ 128 Index: linux-2.6.12-rc3/fs/nfs/inode.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/inode.c +++ linux-2.6.12-rc3/fs/nfs/inode.c @@ -27,11 +27,13 @@ #include #include #include +#include "nfs_iostat.h" #include #include #include #include #include +#include #include #include #include @@ -39,6 +41,7 @@ #include #include +#include "nfs4_fs.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -63,6 +66,8 @@ static void nfs_clear_inode(struct inode static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); +static int nfs_show_stats(struct seq_file *, struct vfsmount *); +static void nfs_zap_acl_cache(struct inode *); static struct rpc_program nfs_program; @@ -75,6 +80,7 @@ static struct super_operations nfs_sops .clear_inode = nfs_clear_inode, .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, + .show_stats = nfs_show_stats, }; /* @@ -106,6 +112,75 @@ static struct rpc_program nfs_program = .pipe_dir_name = "/nfs", }; +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static struct rpc_version * nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = sizeof(nfsacl_version) / sizeof(nfsacl_version[0]), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; +#endif /* CONFIG_NFS_V3_ACL */ + +#ifdef CONFIG_SYSCTL +/* Follow the established convention in NLM */ +#define CTL_UNNUMBERED -2 + +static ctl_table nfs_sysctls[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nfs_mountpoint_timeout", + .data = &nfs_mountpoint_expiry_timeout, + .maxlen = sizeof(nfs_mountpoint_expiry_timeout), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { .ctl_name = 0 } +}; + +static ctl_table nfs_sysctl_dir[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nfs", + .mode = 0555, + .child = nfs_sysctls, + }, + { .ctl_name = 0 } +}; + +static ctl_table nfs_sysctl_root[] = { + { + .ctl_name = CTL_FS, + .procname = "fs", + .mode = 0555, + .child = nfs_sysctl_dir, + }, + { .ctl_name = 0 } +}; + +static struct ctl_table_header *nfs_sysctl_table; + +static inline int nfs_init_sysctl(void) +{ + nfs_sysctl_table = register_sysctl_table(nfs_sysctl_root, 0); + return nfs_sysctl_table != NULL ? 0 : -ENOMEM; +} + +static inline void nfs_destroy_sysctl(void) +{ + unregister_sysctl_table(nfs_sysctl_table); +} +#else +#define nfs_init_sysctl() (0) +#define nfs_destroy_sysctl() do { } while(0) +#endif /* CONFIG_SYSCTL */ + static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) { @@ -118,7 +193,7 @@ nfs_write_inode(struct inode *inode, int int flags = sync ? FLUSH_WAIT : 0; int ret; - ret = nfs_commit_inode(inode, 0, 0, flags); + ret = nfs_commit_inode(inode, flags); if (ret < 0) return ret; return 0; @@ -140,10 +215,6 @@ nfs_delete_inode(struct inode * inode) clear_inode(inode); } -/* - * For the moment, the only task for the NFS clear_inode method is to - * release the mmap credential - */ static void nfs_clear_inode(struct inode *inode) { @@ -152,6 +223,7 @@ nfs_clear_inode(struct inode *inode) nfs_wb_all(inode); BUG_ON (!list_empty(&nfsi->open_files)); + nfs_zap_acl_cache(inode); cred = nfsi->cache_access.cred; if (cred) put_rpccred(cred); @@ -167,6 +239,8 @@ nfs_umount_begin(struct super_block *sb) /* -EIO all pending I/O */ if ((rpc = server->client) != NULL) rpc_killall_tasks(rpc); + if ((rpc = server->client_acl) != NULL) + rpc_killall_tasks(rpc); } @@ -211,6 +285,14 @@ nfs_block_size(unsigned long bsize, unsi return nfs_block_bits(bsize, nrbitsp); } +static inline void +nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) +{ + sb->s_maxbytes = (loff_t)maxfilesize; + if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) + sb->s_maxbytes = MAX_LFS_FILESIZE; +} + /* * Obtain the root inode of the file system. */ @@ -227,6 +309,7 @@ nfs_get_root(struct super_block *sb, str return ERR_PTR(error); } + server->fsid = fsinfo->fattr->fsid; rooti = nfs_fhget(sb, rootfh, fsinfo->fattr); if (!rooti) return ERR_PTR(-ENOMEM); @@ -271,6 +354,12 @@ nfs_sb_init(struct super_block *sb, rpc_ } sb->s_root->d_op = server->rpc_ops->dentry_ops; + server->io_stats = nfs_alloc_iostats(); + if (!server->io_stats) { + no_root_error = -ENOMEM; + goto out_no_root; + } + /* Get some general file system info */ if (server->namelen == 0 && server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) @@ -322,9 +411,7 @@ nfs_sb_init(struct super_block *sb, rpc_ } server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; - sb->s_maxbytes = fsinfo.maxfilesize; - if (sb->s_maxbytes > MAX_LFS_FILESIZE) - sb->s_maxbytes = MAX_LFS_FILESIZE; + nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; @@ -362,17 +449,22 @@ nfs_create_client(struct nfs_server *ser if (!timeparms.to_retries) timeparms.to_retries = 5; + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + /* create transport and client */ xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP, &server->addr, &timeparms); if (IS_ERR(xprt)) { - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + dprintk("%s: cannot create RPC transport. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); return (struct rpc_clnt *)xprt; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, data->pseudoflavor); if (IS_ERR(clnt)) { - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); + dprintk("%s: cannot create RPC client. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); goto out_fail; } @@ -383,7 +475,6 @@ nfs_create_client(struct nfs_server *ser return clnt; out_fail: - xprt_destroy(xprt); return clnt; } @@ -427,21 +518,16 @@ nfs_fill_super(struct super_block *sb, s /* Check NFS protocol revision and initialize RPC op vector * and file handle pool. */ - if (server->flags & NFS_MOUNT_VER3) { #ifdef CONFIG_NFS_V3 + if (server->flags & NFS_MOUNT_VER3) { server->rpc_ops = &nfs_v3_clientops; server->caps |= NFS_CAP_READDIRPLUS; - if (data->version < 4) { - printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); - return -EIO; - } -#else - printk(KERN_NOTICE "NFS: NFSv3 not supported.\n"); - return -EIO; -#endif } else { server->rpc_ops = &nfs_v2_clientops; } +#else + server->rpc_ops = &nfs_v2_clientops; +#endif /* Fill in pseudoflavor for mount version < 5 */ if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) @@ -455,17 +541,34 @@ nfs_fill_super(struct super_block *sb, s return PTR_ERR(server->client); /* RFC 2623, sec 2.3.2 */ if (authflavor != RPC_AUTH_UNIX) { + struct rpc_auth *auth; + server->client_sys = rpc_clone_client(server->client); if (IS_ERR(server->client_sys)) return PTR_ERR(server->client_sys); - if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys)) - return -ENOMEM; + auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys); + if (IS_ERR(auth)) + return PTR_ERR(auth); } else { atomic_inc(&server->client->cl_count); server->client_sys = server->client; } - if (server->flags & NFS_MOUNT_VER3) { +#ifdef CONFIG_NFS_V3_ACL + if (!(server->flags & NFS_MOUNT_NOACL)) { + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + /* No errors! Assume that Sun nfsacls are supported */ + if (!IS_ERR(server->client_acl)) + server->caps |= NFS_CAP_ACLS; + } +#else + server->flags &= ~NFS_MOUNT_NOACL; +#endif /* CONFIG_NFS_V3_ACL */ + /* + * The VFS shouldn't apply the umask to mode bits. We will + * do so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; sb->s_time_gran = 1; @@ -535,7 +638,7 @@ nfs_statfs(struct super_block *sb, struc } -static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { static struct proc_nfs_info { int flag; @@ -543,36 +646,120 @@ static int nfs_show_options(struct seq_f char *nostr; } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", ",hard" }, - { NFS_MOUNT_INTR, ",intr", "" }, - { NFS_MOUNT_POSIX, ",posix", "" }, + { NFS_MOUNT_INTR, ",intr", ",nointr" }, { NFS_MOUNT_TCP, ",tcp", ",udp" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, - { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { NFS_MOUNT_NONLM, ",nolock", "" }, + { NFS_MOUNT_NOACL, ",noacl", "" }, { 0, NULL, NULL } }; struct proc_nfs_info *nfs_infop; - struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); - seq_printf(m, ",v%d", nfss->rpc_ops->version); + seq_printf(m, ",vers=%d", nfss->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); seq_printf(m, ",wsize=%d", nfss->wsize); - if (nfss->acregmin != 3*HZ) + if (nfss->acregmin != 3*HZ || showdefaults) seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); - if (nfss->acregmax != 60*HZ) + if (nfss->acregmax != 60*HZ || showdefaults) seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); - if (nfss->acdirmin != 30*HZ) + if (nfss->acdirmin != 30*HZ || showdefaults) seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); - if (nfss->acdirmax != 60*HZ) + if (nfss->acdirmax != 60*HZ || showdefaults) seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); + if (nfss->flags & NFS_MOUNT_TCP) + seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); + seq_printf(m, ",retrans=%u", nfss->retrans_count); + for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { if (nfss->flags & nfs_infop->flag) seq_puts(m, nfs_infop->str); else seq_puts(m, nfs_infop->nostr); } +} + +static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + + nfs_show_mount_options(m, nfss, 0); + seq_puts(m, ",addr="); seq_escape(m, nfss->hostname, " \t\n\\"); + + return 0; +} + +static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) +{ + int i, cpu; + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + struct rpc_auth *auth = nfss->client->cl_auth; + struct nfs_iostats totals = { }; + + seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS); + + /* + * Display all mount option settings + * need ro/rw, sync/async + */ + seq_printf(m, "\n\topts:\t"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : ""); + nfs_show_mount_options(m, nfss, 1); + + seq_printf(m, "\n\tcaps:\t"); + seq_printf(m, "caps=0x%x", nfss->caps); + seq_printf(m, ",wtmult=%d", nfss->wtmult); + seq_printf(m, ",dtsize=%d", nfss->dtsize); + seq_printf(m, ",bsize=%d", nfss->bsize); + seq_printf(m, ",namelen=%d", nfss->namelen); + +#ifdef CONFIG_NFS_V4 + if (nfss->rpc_ops->version == 4) { + seq_printf(m, "\n\tnfsv4:\t"); + seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); + seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); + seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); + } +#endif + + /* + * Display security flavor in effect for this mount + */ + seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor); + if (auth->au_flavor) + seq_printf(m, ",pseudoflavor=%d", auth->au_flavor); + + /* + * Display superblock I/O counters + */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + struct nfs_iostats *stats; + + if (!cpu_possible(cpu)) + continue; + + preempt_disable(); + stats = per_cpu_ptr(nfss->io_stats, cpu); + + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + totals.events[i] += stats->events[i]; + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + totals.bytes[i] += stats->bytes[i]; + + preempt_enable(); + } + + seq_printf(m, "\n\tevents:\t"); + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + seq_printf(m, "%lu ", totals.events[i]); + seq_printf(m, "\n\tbytes:\t"); + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + seq_printf(m, "%Lu ", totals.bytes[i]); + return 0; } @@ -585,14 +772,26 @@ nfs_zap_caches(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); int mode = inode->i_mode; + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; +} + +static void nfs_zap_acl_cache(struct inode *inode) +{ + void (*clear_acl_cache)(struct inode *); + + clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; + if (clear_acl_cache != NULL) + clear_acl_cache(inode); + NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL; } /* @@ -689,7 +888,7 @@ nfs_fhget(struct super_block *sb, struct /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ - inode->i_op = &nfs_file_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &nfs_file_operations; inode->i_data.a_ops = &nfs_file_aops; @@ -700,6 +899,11 @@ nfs_fhget(struct super_block *sb, struct if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; + /* Deal with crossing mountpoints */ + if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { + inode->i_op = &nfs_mountpoint_inode_operations; + inode->i_fop = NULL; + } } else if (S_ISLNK(inode->i_mode)) inode->i_op = &nfs_symlink_inode_operations; else @@ -787,12 +991,13 @@ nfs_setattr(struct dentry *dentry, struc if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; if ((attr->ia_valid & ATTR_SIZE) != 0) { + nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); inode->i_size = attr->ia_size; vmtruncate(inode, attr->ia_size); } } if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) - NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; nfs_end_data_update(inode); unlock_kernel(); return error; @@ -807,14 +1012,17 @@ nfs_wait_on_inode(struct inode *inode, i { struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_inode *nfsi = NFS_I(inode); + unsigned long start = jiffies; + int error = 0; - int error; - if (!(NFS_FLAGS(inode) & flag)) - return 0; - atomic_inc(&inode->i_count); - error = nfs_wait_event(clnt, nfsi->nfs_i_wait, + if ((NFS_FLAGS(inode) & flag)) { + atomic_inc(&inode->i_count); + error = nfs_wait_event(clnt, nfsi->nfs_i_wait, !(NFS_FLAGS(inode) & flag)); - iput(inode); + nfs_add_stats(inode, NFSIOS_WAITEVENTJIFFIES, (jiffies - start)); + nfs_inc_stats(inode, NFSIOS_WAITEVENT); + iput(inode); + } return error; } @@ -851,7 +1059,7 @@ struct nfs_open_context *alloc_nfs_open_ ctx->state = NULL; ctx->lockowner = current->files; ctx->error = 0; - init_waitqueue_head(&ctx->waitq); + ctx->dir_cookie = 0; } return ctx; } @@ -1015,6 +1223,7 @@ __nfs_revalidate_inode(struct nfs_server goto out; } flags = nfsi->flags; + nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE; /* * We may need to keep the attributes marked as invalid if * we raced with nfs_end_attr_update(). @@ -1022,21 +1231,9 @@ __nfs_revalidate_inode(struct nfs_server if (verifier == nfsi->cache_change_attribute) nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); /* Do the page cache invalidation */ - if (flags & NFS_INO_INVALID_DATA) { - if (S_ISREG(inode->i_mode)) { - if (filemap_fdatawrite(inode->i_mapping) == 0) - filemap_fdatawait(inode->i_mapping); - nfs_wb_all(inode); - } - nfsi->flags &= ~NFS_INO_INVALID_DATA; - invalidate_inode_pages2(inode->i_mapping); - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); - dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", - inode->i_sb->s_id, - (long long)NFS_FILEID(inode)); - /* This ensures we revalidate dentries */ - nfsi->cache_change_attribute++; - } + nfs_revalidate_mapping(inode, inode->i_mapping); + if (flags & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1067,13 +1264,44 @@ int nfs_attribute_timeout(struct inode * */ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { + nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) && !nfs_attribute_timeout(inode)) return NFS_STALE(inode) ? -ESTALE : 0; +out_reval: return __nfs_revalidate_inode(server, inode); } /** + * nfs_revalidate_mapping - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + */ +void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (nfsi->flags & NFS_INO_INVALID_DATA) { + nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); + if (S_ISREG(inode->i_mode)) { + if (filemap_fdatawrite(mapping) == 0) + filemap_fdatawait(mapping); + nfs_wb_all(inode); + } + invalidate_inode_pages2(mapping); + nfsi->flags &= ~NFS_INO_INVALID_DATA; + if (S_ISDIR(inode->i_mode)) { + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + /* This ensures we revalidate child dentries */ + nfsi->cache_change_attribute++; + } + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode)); + } +} + +/** * nfs_begin_data_update * @inode - pointer to inode * Declare that a set of operations will update file data on the server @@ -1106,27 +1334,6 @@ void nfs_end_data_update(struct inode *i } /** - * nfs_end_data_update_defer - * @inode - pointer to inode - * Declare end of the operations that will update file data - * This will defer marking the inode as needing revalidation - * unless there are no other pending updates. - */ -void nfs_end_data_update_defer(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - if (atomic_dec_and_test(&nfsi->data_updates)) { - /* Mark the attribute cache for revalidation */ - nfsi->flags |= NFS_INO_INVALID_ATTR; - /* Directories and symlinks: invalidate page cache too */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - nfsi->flags |= NFS_INO_INVALID_DATA; - nfsi->cache_change_attribute ++; - } -} - -/** * nfs_refresh_inode - verify consistency of the inode attribute cache * @inode - pointer to inode * @fattr - updated attributes @@ -1152,8 +1359,11 @@ int nfs_refresh_inode(struct inode *inod if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 && nfsi->change_attr == fattr->pre_change_attr) nfsi->change_attr = fattr->change_attr; - if (!data_unstable && nfsi->change_attr != fattr->change_attr) + if (nfsi->change_attr != fattr->change_attr) { nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!data_unstable) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } } if ((fattr->valid & NFS_ATTR_FATTR) == 0) @@ -1176,18 +1386,22 @@ int nfs_refresh_inode(struct inode *inod } /* Verify a few of the more important attributes */ - if (!data_unstable) { - if (!timespec_equal(&inode->i_mtime, &fattr->mtime) - || cur_size != new_isize) - nfsi->flags |= NFS_INO_INVALID_ATTR; - } else if (S_ISREG(inode->i_mode) && new_isize > cur_size) - nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { + nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!data_unstable) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } + if (cur_size != new_isize) { + nfsi->flags |= NFS_INO_INVALID_ATTR; + if (nfsi->npages == 0) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ if (inode->i_nlink != fattr->nlink) @@ -1214,11 +1428,10 @@ int nfs_refresh_inode(struct inode *inod */ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) { + struct nfs_server *server; struct nfs_inode *nfsi = NFS_I(inode); - __u64 new_size; - loff_t new_isize; + loff_t cur_isize, new_isize; unsigned int invalid = 0; - loff_t cur_isize; int data_unstable; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", @@ -1243,6 +1456,12 @@ static int nfs_update_inode(struct inode if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) goto out_changed; + server = NFS_SERVER(inode); + /* Update the fsid if and only if this is the root directory */ + if (inode == inode->i_sb->s_root->d_inode + && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) + server->fsid = fattr->fsid; + /* * Update the read time so we don't revalidate too often. */ @@ -1251,61 +1470,56 @@ static int nfs_update_inode(struct inode /* Are we racing with known updates of the metadata on the server? */ data_unstable = ! nfs_verify_change_attribute(inode, verifier); - /* Check if the file size agrees */ - new_size = fattr->size; + /* Check if our cached file size is stale */ new_isize = nfs_size_to_loff_t(fattr->size); cur_isize = i_size_read(inode); - if (cur_isize != new_size) { -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif - /* - * If we have pending writebacks, things can get - * messy. - */ - if (S_ISREG(inode->i_mode) && data_unstable) { - if (new_isize > cur_isize) { + if (new_isize != cur_isize) { + /* Do we perhaps have any outstanding writes? */ + if (nfsi->npages == 0) { + /* No, but did we race with nfs_end_data_update()? */ + if (verifier == nfsi->cache_change_attribute) { inode->i_size = new_isize; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + invalid |= NFS_INO_INVALID_DATA; } - } else { + invalid |= NFS_INO_INVALID_ATTR; + } else if (new_isize > cur_isize) { inode->i_size = new_isize; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } + dprintk("NFS: isize change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); } - /* - * Note: we don't check inode->i_mtime since pipes etc. - * can change this value in VFS without requiring a - * cache revalidation. - */ + /* Check if the mtime agrees */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif + dprintk("NFS: mtime change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); if (!data_unstable) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } if ((fattr->valid & NFS_ATTR_FATTR_V4) && nfsi->change_attr != fattr->change_attr) { -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n", + dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif nfsi->change_attr = fattr->change_attr; if (!data_unstable) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + /* If ctime has changed we should definitely clear access+acl caches */ + if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { + if (!data_unstable) + invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + } memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; @@ -1325,6 +1539,7 @@ static int nfs_update_inode(struct inode /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { @@ -1360,9 +1575,170 @@ static int nfs_update_inode(struct inode } /* + * nfs_try_migrate_filehandle - Check if we can migrate the inode filehandle + * @inode - pointer to inode + * @fh - the filehandle resulting from lookup() + * @fattr - attributes associated with the new filehandle + * + * Do our very best to update existing inodes when the user wants to migrate + * this filesystem to a replica server. + * + * Note that here be HUGE dragons, with endless possibilities for causing + * trouble... + */ +int nfs_try_migrate_filehandle(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr, uint32_t generation) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + /* Argh! The basic file type has changed */ + if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + goto out_bad; + /* Fileid + filehandle are the same. Good! */ + if (nfs_compare_fh(&nfsi->fh, fh) == 0 && nfsi->fileid == fattr->fileid) + goto out_good; + if (fattr->valid && NFS_ATTR_FATTR_V4) { + /* Do the NFSv4 change attributes match our cached value? */ + if (nfsi->change_attr != fattr->change_attr) + goto out_bad; + } else { + /* Does the ctime match? */ + if (!timespec_equal(&fattr->ctime, &inode->i_ctime)) + goto out_bad; + } + /* Does the file size match? */ + if (nfs_size_to_loff_t(fattr->size) != inode->i_size) + goto out_bad; + /* FIXME: Here lie the biggest dragons: + * Try considering all possible races w.r.t. iget5_locked() + */ + nfs_copy_fh(&nfsi->fh, fh); + if (nfsi->fileid != fattr->fileid) { + /* The very concept of migrating to a new hash bucket + * is so full of holes and races that it defies belief! + */ + remove_inode_hash(inode); + nfsi->fileid = fattr->fileid; + inode->i_ino = nfs_fattr_to_ino_t(fattr); + insert_inode_hash(inode); + } +out_good: + inode->i_generation = generation; + return 0; +out_bad: + return -EIO; +} + +/* + * nfs_try_migrate_inode - Update an inode's filehandle after migration + * @inode - pointer to inode to migrate + * @dentry - pointer to dentry + */ +int nfs_try_migrate_inode(struct inode *inode, struct dentry *dentry) +{ + struct nfs_fh fh; + struct nfs_fattr fattr; + struct dentry *next, *next_parent; + uint32_t generation; + int status; + + if (dentry == NULL) { + status = -ENOENT; + dentry = d_find_alias(inode); + if (dentry == NULL) + goto out; + } else + dget(dentry); +repeat: + /* Has this inode already been revalidated? */ + status = 0; + generation = NFS_SERVER(inode)->generation; + if ((long)generation - (long)inode->i_generation <= 0) + goto out; + /* No. Search for a previously revalidated path element */ + next = dget(dentry); + next_parent = dget_parent(dentry); + while((long)generation - (long)next_parent->d_inode->i_generation > 0) { + BUG_ON(IS_ROOT(next_parent)); + dput(next); + next = next_parent; + next_parent = dget_parent(next); + } + status = NFS_PROTO(inode)->lookup(next_parent->d_inode, &next->d_name, + &fh, &fattr); + if (status == 0) + status = nfs_try_migrate_filehandle(next->d_inode, &fh, &fattr, generation); + switch (status) { + case -ESTALE: + if (IS_ROOT(next_parent)) + break; + case 0: + if (dentry->d_inode == inode) + break; + dput(next_parent); + dput(next); + goto repeat; + default: + d_drop(next); + } + dput(next_parent); + dput(next); +out: + dput(dentry); + dprintk("%s: returned error %d\n", __FUNCTION__, status); + return status; +} + +/* * File system information */ +/* + * nfs_path - reconstruct the path given an arbitrary dentry + * @base - arbitrary string to prepend to the path + * @dentry - pointer to dentry + * @buffer - result buffer + * @buflen - length of buffer + * + * Helper function for constructing the path from the + * root dentry to an arbitrary hashed dentry. + * + * This is mainly for use in figuring out the path on the + * server side when automounting on top of an existing partition. + */ +static char *nfs_path(const char *base, const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + char *end = buffer+buflen; + int namelen; + + *--end = '\0'; + buflen--; + spin_lock(&dcache_lock); + while (!IS_ROOT(dentry)) { + namelen = dentry->d_name.len; + buflen -= namelen + 1; + if (buflen < 0) + goto Elong; + end -= namelen; + memcpy(end, dentry->d_name.name, namelen); + *--end = '/'; + dentry = dentry->d_parent; + } + spin_unlock(&dcache_lock); + namelen = strlen(base); + /* Strip off excess slashes in base string */ + while (namelen > 0 && base[namelen - 1] == '/') + namelen--; + buflen -= namelen; + if (buflen < 0) + goto Elong; + end -= namelen; + memcpy(end, base, namelen); + return end; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + static int nfs_set_super(struct super_block *s, void *data) { s->s_fs_info = data; @@ -1385,74 +1761,94 @@ static struct super_block *nfs_get_sb(st int flags, const char *dev_name, void *raw_data) { int error; - struct nfs_server *server; + struct nfs_server *server = NULL; struct super_block *s; struct nfs_fh *root; struct nfs_mount_data *data = raw_data; - if (!data) { - printk("nfs_read_super: missing data argument\n"); - return ERR_PTR(-EINVAL); + s = ERR_PTR(-EINVAL); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + goto out_err; } + if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); + goto out_err; + } + switch (data->version) { + case 1: + data->namlen = 0; + case 2: + data->bsize = 0; + case 3: + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: mount structure version %d does not support NFSv3\n", + __FUNCTION__, + data->version); + goto out_err; + } + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) { + dprintk("%s: mount structure version %d does not support strong security\n", + __FUNCTION__, + data->version); + goto out_err; + } + case 5: + memset(data->context, 0, sizeof(data->context)); + } +#ifndef CONFIG_NFS_V3 + /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ + s = ERR_PTR(-EPROTONOSUPPORT); + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); + goto out_err; + } +#endif /* CONFIG_NFS_V3 */ + s = ERR_PTR(-ENOMEM); server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) - return ERR_PTR(-ENOMEM); + goto out_err; memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); - if (data->version != NFS_MOUNT_VERSION) { - printk("nfs warning: mount version %s than kernel\n", - data->version < NFS_MOUNT_VERSION ? "older" : "newer"); - if (data->version < 2) - data->namlen = 0; - if (data->version < 3) - data->bsize = 0; - if (data->version < 4) { - data->flags &= ~NFS_MOUNT_VER3; - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - } - if (data->version < 5) - data->flags &= ~NFS_MOUNT_SECFLAVOUR; - } - root = &server->fh; if (data->flags & NFS_MOUNT_VER3) root->size = data->root.size; else root->size = NFS2_FHSIZE; + s = ERR_PTR(-EINVAL); if (root->size > sizeof(root->data)) { - printk("nfs_get_sb: invalid root filehandle\n"); - kfree(server); - return ERR_PTR(-EINVAL); + dprintk("%s: invalid root filehandle\n", __FUNCTION__); + goto out_err; } memcpy(root->data, data->root.data, root->size); /* We now require that the mount process passes the remote address */ memcpy(&server->addr, &data->addr, sizeof(server->addr)); if (server->addr.sin_addr.s_addr == INADDR_ANY) { - printk("NFS: mount program didn't pass remote address!\n"); - kfree(server); - return ERR_PTR(-EINVAL); + dprintk("%s: mount program didn't pass remote address!\n", + __FUNCTION__); + goto out_err; } - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); - - if (IS_ERR(s) || s->s_root) { - kfree(server); - return s; + /* Fire up rpciod if not yet running */ + s = ERR_PTR(rpciod_up()); + if (IS_ERR(s)) { + dprintk("%s: couldn't start rpciod! Error = %ld\n", + __FUNCTION__, PTR_ERR(s)); + goto out_err; } - s->s_flags = flags; + s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s) || s->s_root) + goto out_rpciod_down; - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - kfree(server); - return ERR_PTR(-EIO); - } + s->s_flags = flags; error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { @@ -1462,6 +1858,11 @@ static struct super_block *nfs_get_sb(st } s->s_flags |= MS_ACTIVE; return s; +out_rpciod_down: + rpciod_down(); +out_err: + kfree(server); + return s; } static void nfs_kill_super(struct super_block *s) @@ -1474,6 +1875,8 @@ static void nfs_kill_super(struct super_ rpc_shutdown_client(server->client); if (server->client_sys != NULL && !IS_ERR(server->client_sys)) rpc_shutdown_client(server->client_sys); + if (server->client_acl != NULL && !IS_ERR(server->client_acl)) + rpc_shutdown_client(server->client_acl); if (!(server->flags & NFS_MOUNT_NONLM)) lockd_down(); /* release rpc.lockd */ @@ -1483,6 +1886,7 @@ static void nfs_kill_super(struct super_ if (server->hostname != NULL) kfree(server->hostname); kfree(server); + nfs_release_automount_timer(); } static struct file_system_type nfs_fs_type = { @@ -1507,6 +1911,7 @@ static struct super_operations nfs4_sops .clear_inode = nfs4_clear_inode, .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, + .show_stats = nfs_show_stats, }; /* @@ -1518,9 +1923,6 @@ static void nfs4_clear_inode(struct inod { struct nfs_inode *nfsi = NFS_I(inode); - /* If we are holding a delegation, return it! */ - if (nfsi->delegation != NULL) - nfs_inode_return_delegation(inode); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); /* Now clear out any remaining state */ @@ -1538,6 +1940,9 @@ static void nfs4_clear_inode(struct inod BUG_ON(atomic_read(&state->count) != 1); nfs4_close_state(state, state->state); } + /* If we are holding a delegation, return it! */ + if (nfsi->delegation != NULL) + nfs_inode_clear_delegation(inode); } @@ -1592,17 +1997,24 @@ static int nfs4_fill_super(struct super_ return -EINVAL; } + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + clp = nfs4_get_client(&server->addr.sin_addr); if (!clp) { - printk(KERN_WARNING "NFS: failed to create NFS4 client.\n"); + dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); return -EIO; } /* Now create transport and client */ authflavour = RPC_AUTH_UNIX; if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen > 1) - printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n"); + if (data->auth_flavourlen != 1) { + dprintk("%s: Invalid number of RPC auth flavours %d.\n", + __FUNCTION__, data->auth_flavourlen); + err = -EINVAL; + goto out_fail; + } if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { err = -EFAULT; goto out_fail; @@ -1614,17 +2026,18 @@ static int nfs4_fill_super(struct super_ xprt = xprt_create_proto(proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { up_write(&clp->cl_sem); - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); err = PTR_ERR(xprt); + dprintk("%s: cannot create RPC transport. Error = %d\n", + __FUNCTION__, err); goto out_fail; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, authflavour); if (IS_ERR(clnt)) { up_write(&clp->cl_sem); - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - xprt_destroy(xprt); err = PTR_ERR(clnt); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); goto out_fail; } clnt->cl_intr = 1; @@ -1656,21 +2069,26 @@ static int nfs4_fill_super(struct super_ clp = NULL; if (IS_ERR(clnt)) { - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - return PTR_ERR(clnt); + err = PTR_ERR(clnt); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); + return err; } server->client = clnt; if (server->nfs4_state->cl_idmap == NULL) { - printk(KERN_WARNING "NFS: failed to create idmapper.\n"); + dprintk("%s: failed to create idmapper.\n", __FUNCTION__); return -ENOMEM; } if (clnt->cl_auth->au_flavor != authflavour) { - if (rpcauth_create(authflavour, clnt) == NULL) { - printk(KERN_WARNING "NFS: couldn't create credcache!\n"); - return -ENOMEM; + struct rpc_auth *auth; + + auth = rpcauth_create(authflavour, clnt); + if (IS_ERR(auth)) { + dprintk("%s: couldn't create credcache!\n", __FUNCTION__); + return PTR_ERR(auth); } } @@ -1730,8 +2148,12 @@ static struct super_block *nfs4_get_sb(s struct nfs4_mount_data *data = raw_data; void *p; - if (!data) { - printk("nfs_read_super: missing data argument\n"); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + return ERR_PTR(-EINVAL); + } + if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); return ERR_PTR(-EINVAL); } @@ -1742,11 +2164,6 @@ static struct super_block *nfs4_get_sb(s /* Zero out the NFS state stuff */ init_nfsv4_state(server); - if (data->version != NFS4_MOUNT_VERSION) { - printk("nfs warning: mount version %s than kernel\n", - data->version < NFS4_MOUNT_VERSION ? "older" : "newer"); - } - p = nfs_copy_user_string(NULL, &data->hostname, 256); if (IS_ERR(p)) goto out_err; @@ -1773,11 +2190,20 @@ static struct super_block *nfs4_get_sb(s } if (server->addr.sin_family != AF_INET || server->addr.sin_addr.s_addr == INADDR_ANY) { - printk("NFS: mount program didn't pass remote IP address!\n"); + dprintk("%s: mount program didn't pass remote IP address!\n", + __FUNCTION__); s = ERR_PTR(-EINVAL); goto out_free; } + /* Fire up rpciod if not yet running */ + s = ERR_PTR(rpciod_up()); + if (IS_ERR(s)) { + dprintk("%s: couldn't start rpciod! Error = %ld\n", + __FUNCTION__, PTR_ERR(s)); + goto out_free; + } + s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); if (IS_ERR(s) || s->s_root) @@ -1785,13 +2211,6 @@ static struct super_block *nfs4_get_sb(s s->s_flags = flags; - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - s = ERR_PTR(-EIO); - goto out_free; - } - error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { up_write(&s->s_umount); @@ -1807,6 +2226,7 @@ out_free: kfree(server->mnt_path); if (server->hostname) kfree(server->hostname); + nfs_free_iostats(server->io_stats); kfree(server); return s; } @@ -1829,6 +2249,7 @@ static void nfs4_kill_super(struct super if (server->hostname != NULL) kfree(server->hostname); kfree(server); + nfs_release_automount_timer(); } static struct file_system_type nfs4_fs_type = { @@ -1839,6 +2260,59 @@ static struct file_system_type nfs4_fs_t .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +/* Constructs the SERVER-side path */ +static inline char *nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen) +{ + return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen); +} + +static inline char *nfs4_dup_path(const struct dentry *dentry) +{ + char *page = (char *) __get_free_page(GFP_USER); + char *path; + + path = nfs4_path(dentry, page, PAGE_SIZE); + if (!IS_ERR(path)) { + int len = PAGE_SIZE + page - path; + char *tmp = path; + + path = kmalloc(len, GFP_KERNEL); + if (path) + memcpy(path, tmp, len); + else + path = ERR_PTR(-ENOMEM); + } + free_page((unsigned long)page); + return path; +} + +static struct super_block *nfs4_clone_client(struct nfs_server *server, const struct dentry *dentry) +{ + struct nfs4_client *clp = server->nfs4_state; + struct super_block *sb; + + server->mnt_path = nfs4_dup_path(dentry); + if (IS_ERR(server->mnt_path)) { + sb = (struct super_block *)server->mnt_path; + goto err; + } + sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); + if (IS_ERR(sb) || sb->s_root) + goto free_path; + nfs4_server_capabilities(server, &server->fh); + + down_write(&clp->cl_sem); + atomic_inc(&clp->cl_count); + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + up_write(&clp->cl_sem); + return sb; +free_path: + kfree(server->mnt_path); +err: + server->mnt_path = NULL; + return sb; +} + #define nfs4_init_once(nfsi) \ do { \ INIT_LIST_HEAD(&(nfsi)->open_states); \ @@ -1849,12 +2323,175 @@ static struct file_system_type nfs4_fs_t #define register_nfs4fs() register_filesystem(&nfs4_fs_type) #define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type) #else +#define nfs4_clone_client(a,b) ERR_PTR(-EINVAL) #define nfs4_init_once(nfsi) \ do { } while (0) #define register_nfs4fs() (0) #define unregister_nfs4fs() #endif +static inline struct super_block *nfs_clone_client(struct nfs_server *server) +{ + struct super_block *sb; + + sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM)) + lockd_up(); + return sb; +} + +struct nfs_clone_mount { + const struct super_block *sb; + const struct dentry *dentry; + struct nfs_fh *fh; + struct nfs_fattr *fattr; +}; + +static struct super_block *clone_nfs_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + struct nfs_clone_mount *data = raw_data; + struct nfs_server *server; + struct nfs_server *parent = NFS_SB(data->sb); + struct super_block *sb = ERR_PTR(-EINVAL); + void *err = ERR_PTR(-ENOMEM); + struct inode *root_inode; + struct nfs_fsinfo fsinfo; + int len; + + server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (server == NULL) + goto out_err; + memcpy(server, parent, sizeof(*server)); + len = strlen(parent->hostname) + 1; + server->hostname = kmalloc(len, GFP_KERNEL); + if (server->hostname == NULL) + goto free_server; + memcpy(server->hostname, parent->hostname, len); + server->fsid = data->fattr->fsid; + nfs_copy_fh(&server->fh, data->fh); + server->io_stats = nfs_alloc_iostats(); + if (server->io_stats == NULL) + goto free_hostname; + if (rpciod_up() != 0) + goto free_iostats; + + switch (parent->rpc_ops->version) { + case 2: + case 3: + sb = nfs_clone_client(server); + break; + case 4: + sb = nfs4_clone_client(server, data->dentry); + break; + default: + BUG(); + } + if (IS_ERR((err = sb)) || sb->s_root) + goto kill_rpciod; + sb->s_op = data->sb->s_op; + sb->s_blocksize = data->sb->s_blocksize; + sb->s_blocksize_bits = data->sb->s_blocksize_bits; + sb->s_maxbytes = data->sb->s_maxbytes; + + server->client_acl = NULL; + server->client_sys = NULL; + server->client = rpc_clone_client(parent->client); + if (IS_ERR((err = server->client))) + goto out_deactivate; + if (parent->client_sys != NULL) { + server->client_sys = rpc_clone_client(parent->client_sys); + if (IS_ERR((err = server->client_sys))) + goto out_deactivate; + } + if (parent->client_acl != NULL) { + server->client_acl = rpc_clone_client(parent->client_acl); + if (IS_ERR((err = server->client_acl))) + goto out_deactivate; + } + root_inode = nfs_fhget(sb, data->fh, data->fattr); + if (!root_inode) + goto out_deactivate; + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) + goto out_put_root; + fsinfo.fattr = data->fattr; + if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0) + nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); + sb->s_root->d_op = server->rpc_ops->dentry_ops; + sb->s_flags |= MS_ACTIVE; + return sb; +out_put_root: + iput(root_inode); +out_deactivate: + up_write(&sb->s_umount); + deactivate_super(sb); + return (struct super_block *)err; +kill_rpciod: + rpciod_down(); +free_iostats: + nfs_free_iostats(server->io_stats); +free_hostname: + kfree(server->hostname); +free_server: + kfree(server); +out_err: + return (struct super_block *)err; +} + +static struct file_system_type clone_nfs_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .get_sb = clone_nfs_sb, + .kill_sb = nfs_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static inline char *nfs_devname(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen); +} + +/** + * nfs_do_submount - set up mountpoint when crossing a filesystem boundary + * @mnt_parent - mountpoint of parent directory + * @dentry - parent directory + * @fh - filehandle for new root dentry + * @fattr - attributes for new root inode + * + */ +struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, + const struct dentry *dentry, struct nfs_fh *fh, + struct nfs_fattr *fattr) +{ + struct nfs_clone_mount mountdata = { + .sb = mnt_parent->mnt_sb, + .dentry = dentry, + .fh = fh, + .fattr = fattr, + }; + struct vfsmount *mnt = ERR_PTR(-ENOMEM); + char *page = (char *) __get_free_page(GFP_USER); + char *devname; + + dprintk("%s: submounting on %s/%s\n", __FUNCTION__, + dentry->d_parent->d_name.name, + dentry->d_name.name); + if (page == NULL) + goto out; + devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); + if (!IS_ERR(devname)) + mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, &mountdata); + else + mnt = (struct vfsmount *)devname; + free_page((unsigned long)page); +out: + dprintk("%s: done\n", __FUNCTION__); + return mnt; +} + extern int nfs_init_nfspagecache(void); extern void nfs_destroy_nfspagecache(void); extern int nfs_init_readpagecache(void); @@ -1875,6 +2512,13 @@ static struct inode *nfs_alloc_inode(str if (!nfsi) return NULL; nfsi->flags = 0; +#ifdef CONFIG_NFS_V3_ACL + nfsi->acl_access = ERR_PTR(-EAGAIN); + nfsi->acl_default = ERR_PTR(-EAGAIN); +#endif +#ifdef CONFIG_NFS_V4 + nfsi->nfs4_acl = NULL; +#endif /* CONFIG_NFS_V4 */ return &nfsi->vfs_inode; } @@ -1929,6 +2573,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfs_init_sysctl(); + if (err) + goto out5; + err = nfs_init_nfspagecache(); if (err) goto out4; @@ -1976,6 +2624,8 @@ out2: out3: nfs_destroy_nfspagecache(); out4: + nfs_destroy_sysctl(); +out5: return err; } @@ -1991,6 +2641,7 @@ static void __exit exit_nfs_fs(void) #ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); #endif + nfs_destroy_sysctl(); unregister_filesystem(&nfs_fs_type); unregister_nfs4fs(); } Index: linux-2.6.12-rc3/fs/nfs/mount_clnt.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/mount_clnt.c +++ linux-2.6.12-rc3/fs/nfs/mount_clnt.c @@ -80,9 +80,7 @@ mnt_create(char *hostname, struct sockad clnt = rpc_create_client(xprt, hostname, &mnt_program, version, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) { - xprt_destroy(xprt); - } else { + if (!IS_ERR(clnt)) { clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; Index: linux-2.6.12-rc3/fs/nfs/namespace.c =================================================================== --- /dev/null +++ linux-2.6.12-rc3/fs/nfs/namespace.c @@ -0,0 +1,111 @@ +/* + * linux/fs/nfs/namespace.c + * + * Copyright (C) 2005 Trond Myklebust + * + * NFS namespace + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#define NFSDBG_FACILITY NFSDBG_VFS + +static LIST_HEAD(nfs_automount_list); +static void nfs_expire_automounts(void *list); +static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list); +int nfs_mountpoint_expiry_timeout = 500 * HZ; + +/* + * nfs_follow_mountpoint - handle crossing a mountpoint on the server + * @dentry - dentry of mountpoint + * @nd - nameidata info + * + * When we encounter a mountpoint on the server, we want to set up + * a mountpoint on the client too, to prevent inode numbers from + * colliding, and to allow "df" to work properly. + * On NFSv4, we also want to allow for the fact that different + * filesystems may be migrated to different servers in a failover + * situation, and that different filesystems may want to use + * different security flavours. + */ +static int nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) +{ + struct vfsmount *mnt; + struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct dentry *parent; + struct nfs_fh fh; + struct nfs_fattr fattr; + int err; + + BUG_ON(IS_ROOT(dentry)); + dprintk("%s: enter\n", __FUNCTION__); + dput(nd->dentry); + nd->dentry = dget(dentry); + if (d_mountpoint(nd->dentry)) + goto out_follow; + /* Look it up again */ + parent = dget_parent(nd->dentry); + err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr); + dput(parent); + if (err != 0) + goto out_err; + mnt = nfs_do_submount(nd->mnt, nd->dentry, &fh, &fattr); + if (IS_ERR(mnt)) { + err = PTR_ERR(mnt); + goto out_err; + } + mntget(mnt); + err = do_add_mount(mnt, nd, nd->mnt->mnt_flags, &nfs_automount_list); + if (err < 0) { + mntput(mnt); + if (err == -EBUSY) + goto out_follow; + goto out_err; + } + mntput(nd->mnt); + dput(nd->dentry); + nd->mnt = mnt; + nd->dentry = dget(mnt->mnt_root); + schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); +out: + dprintk("%s: done, returned %d\n", __FUNCTION__, err); + return err; +out_err: + path_release(nd); + goto out; +out_follow: + while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) + ; + err = 0; + goto out; +} + +struct inode_operations nfs_mountpoint_inode_operations = { + .follow_link = nfs_follow_mountpoint, + .getattr = nfs_getattr, +}; + +static void nfs_expire_automounts(void *data) +{ + struct list_head *list = (struct list_head *)data; + + mark_mounts_for_expiry(list); + if (!list_empty(list)) + schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); +} + +void nfs_release_automount_timer(void) +{ + if (list_empty(&nfs_automount_list)) { + cancel_delayed_work(&nfs_automount_task); + flush_scheduled_work(); + } +} Index: linux-2.6.12-rc3/fs/nfs/nfs2xdr.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/nfs2xdr.c +++ linux-2.6.12-rc3/fs/nfs/nfs2xdr.c @@ -131,7 +131,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt fattr->du.nfs2.blocksize = ntohl(*p++); rdev = ntohl(*p++); fattr->du.nfs2.blocks = ntohl(*p++); - fattr->fsid_u.nfs3 = ntohl(*p++); + fattr->fsid.major = ntohl(*p++); + fattr->fsid.minor = 0; fattr->fileid = ntohl(*p++); p = xdr_decode_time(p, &fattr->atime); p = xdr_decode_time(p, &fattr->mtime); Index: linux-2.6.12-rc3/fs/nfs/nfs3acl.c =================================================================== --- /dev/null +++ linux-2.6.12-rc3/fs/nfs/nfs3acl.c @@ -0,0 +1,403 @@ +#include +#include +#include +#include +#include +#include + +#define NFSDBG_FACILITY NFSDBG_PROC + +ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int pos=0, len=0; + +# define output(s) do { \ + if (pos + sizeof(s) <= size) { \ + memcpy(buffer + pos, s, sizeof(s)); \ + pos += sizeof(s); \ + } \ + len += sizeof(s); \ + } while(0) + + acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + output("system.posix_acl_access"); + posix_acl_release(acl); + } + + if (S_ISDIR(inode->i_mode)) { + acl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + output("system.posix_acl_default"); + posix_acl_release(acl); + } + } + +# undef output + + if (!buffer || len <= size) + return len; + return -ERANGE; +} + +ssize_t nfs3_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int type, error = 0; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + acl = nfs3_proc_getacl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + if (type == ACL_TYPE_ACCESS && acl->a_count == 0) + error = -ENODATA; + else + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + } else + error = -ENODATA; + + return error; +} + +int nfs3_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int type, error; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + error = nfs3_proc_setacl(inode, type, acl); + posix_acl_release(acl); + + return error; +} + +int nfs3_removexattr(struct dentry *dentry, const char *name) +{ + struct inode *inode = dentry->d_inode; + int type; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + return nfs3_proc_setacl(inode, type, NULL); +} + +static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi) +{ + if (!IS_ERR(nfsi->acl_access)) { + posix_acl_release(nfsi->acl_access); + nfsi->acl_access = ERR_PTR(-EAGAIN); + } + if (!IS_ERR(nfsi->acl_default)) { + posix_acl_release(nfsi->acl_default); + nfsi->acl_default = ERR_PTR(-EAGAIN); + } +} + +void nfs3_forget_cached_acls(struct inode *inode) +{ + dprintk("NFS: nfs3_forget_cached_acls(%s/%ld)\n", inode->i_sb->s_id, + inode->i_ino); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); + spin_unlock(&inode->i_lock); +} + +static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct posix_acl *acl = ERR_PTR(-EINVAL); + + spin_lock(&inode->i_lock); + switch(type) { + case ACL_TYPE_ACCESS: + acl = nfsi->acl_access; + break; + + case ACL_TYPE_DEFAULT: + acl = nfsi->acl_default; + break; + + default: + goto out; + } + if (IS_ERR(acl)) + acl = ERR_PTR(-EAGAIN); + else + acl = posix_acl_dup(acl); +out: + spin_unlock(&inode->i_lock); + dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id, + inode->i_ino, type, acl); + return acl; +} + +static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + dprintk("nfs3_cache_acls(%s/%ld, %p, %p)\n", inode->i_sb->s_id, + inode->i_ino, acl, dfacl); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); + nfsi->acl_access = posix_acl_dup(acl); + nfsi->acl_default = posix_acl_dup(dfacl); + spin_unlock(&inode->i_lock); +} + +struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct page *pages[NFSACL_MAXPAGES] = { }; + struct nfs3_getaclargs args = { + .fh = NFS_FH(inode), + /* The xdr layer may allocate pages here. */ + .pages = pages, + }; + struct nfs3_getaclres res = { + .fattr = &fattr, + }; + struct posix_acl *acl; + int status, count; + + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) + return ERR_PTR(-EOPNOTSUPP); + + status = nfs_revalidate_inode(server, inode); + if (status < 0) + return ERR_PTR(status); + acl = nfs3_get_cached_acl(inode, type); + if (acl != ERR_PTR(-EAGAIN)) + return acl; + acl = NULL; + + /* + * Only get the access acl when explicitly requested: We don't + * need it for access decisions, and only some applications use + * it. Applications which request the access acl first are not + * penalized from this optimization. + */ + if (type == ACL_TYPE_ACCESS) + args.mask |= NFS_ACLCNT|NFS_ACL; + if (S_ISDIR(inode->i_mode)) + args.mask |= NFS_DFACLCNT|NFS_DFACL; + if (args.mask == 0) + return NULL; + + dprintk("NFS call getacl\n"); + status = rpc_call(server->client_acl, ACLPROC3_GETACL, + &args, &res, 0); + dprintk("NFS reply getacl: %d\n", status); + + /* pages may have been allocated at the xdr layer. */ + for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++) + __free_page(args.pages[count]); + + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + dprintk("NFS_V3_ACL extension not supported; disabling\n"); + server->caps &= ~NFS_CAP_ACLS; + case -ENOTSUPP: + status = -EOPNOTSUPP; + default: + goto getout; + } + if ((args.mask & res.mask) != args.mask) { + status = -EIO; + goto getout; + } + + if (res.acl_access != NULL) { + if (posix_acl_equiv_mode(res.acl_access, NULL) == 0) { + posix_acl_release(res.acl_access); + res.acl_access = NULL; + } + } + nfs3_cache_acls(inode, res.acl_access, res.acl_default); + + switch(type) { + case ACL_TYPE_ACCESS: + acl = res.acl_access; + res.acl_access = NULL; + break; + + case ACL_TYPE_DEFAULT: + acl = res.acl_default; + res.acl_default = NULL; + } + +getout: + posix_acl_release(res.acl_access); + posix_acl_release(res.acl_default); + + if (status != 0) { + posix_acl_release(acl); + acl = ERR_PTR(status); + } + return acl; +} + +static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct page *pages[NFSACL_MAXPAGES] = { }; + struct nfs3_setaclargs args = { + .inode = inode, + .mask = NFS_ACL, + .acl_access = acl, + .pages = pages, + }; + int status, count; + + status = -EOPNOTSUPP; + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) + goto out; + + /* We are doing this here, because XDR marshalling can only + return -ENOMEM. */ + status = -ENOSPC; + if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES) + goto out; + if (dfacl != NULL && dfacl->a_count > NFS_ACL_MAX_ENTRIES) + goto out; + if (S_ISDIR(inode->i_mode)) { + args.mask |= NFS_DFACL; + args.acl_default = dfacl; + } + + dprintk("NFS call setacl\n"); + nfs_begin_data_update(inode); + status = rpc_call(server->client_acl, ACLPROC3_SETACL, + &args, &fattr, 0); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + nfs_end_data_update(inode); + dprintk("NFS reply setacl: %d\n", status); + + /* pages may have been allocated at the xdr layer. */ + for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++) + __free_page(args.pages[count]); + + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + dprintk("NFS_V3_ACL SETACL RPC not supported" + "(will not retry)\n"); + server->caps &= ~NFS_CAP_ACLS; + case -ENOTSUPP: + status = -EOPNOTSUPP; + } +out: + return status; +} + +int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct posix_acl *alloc = NULL, *dfacl = NULL; + int status; + + if (S_ISDIR(inode->i_mode)) { + switch(type) { + case ACL_TYPE_ACCESS: + alloc = dfacl = nfs3_proc_getacl(inode, + ACL_TYPE_DEFAULT); + if (IS_ERR(alloc)) + goto fail; + break; + + case ACL_TYPE_DEFAULT: + dfacl = acl; + alloc = acl = nfs3_proc_getacl(inode, + ACL_TYPE_ACCESS); + if (IS_ERR(alloc)) + goto fail; + break; + + default: + return -EINVAL; + } + } else if (type != ACL_TYPE_ACCESS) + return -EINVAL; + + if (acl == NULL) { + alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(alloc)) + goto fail; + } + status = nfs3_proc_setacls(inode, acl, dfacl); + posix_acl_release(alloc); + return status; + +fail: + return PTR_ERR(alloc); +} + +int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, + mode_t mode) +{ + struct posix_acl *dfacl, *acl; + int error = 0; + + dfacl = nfs3_proc_getacl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(dfacl)) { + error = PTR_ERR(dfacl); + return (error == -EOPNOTSUPP) ? 0 : error; + } + if (!dfacl) + return 0; + acl = posix_acl_clone(dfacl, GFP_KERNEL); + error = -ENOMEM; + if (!acl) + goto out_release_dfacl; + error = posix_acl_create_masq(acl, &mode); + if (error < 0) + goto out_release_acl; + error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ? + dfacl : NULL); +out_release_acl: + posix_acl_release(acl); +out_release_dfacl: + posix_acl_release(dfacl); + return error; +} Index: linux-2.6.12-rc3/fs/nfs/nfs3proc.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/nfs3proc.c +++ linux-2.6.12-rc3/fs/nfs/nfs3proc.c @@ -17,6 +17,7 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_PROC @@ -45,7 +46,7 @@ static inline int nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) { struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[proc], + .rpc_proc = &clnt->cl_procinfo[proc], .rpc_argp = argp, .rpc_resp = resp, }; @@ -297,7 +298,7 @@ static int nfs3_proc_commit(struct nfs_w */ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs_fh fhandle; struct nfs_fattr fattr; @@ -313,7 +314,8 @@ nfs3_proc_create(struct inode *dir, stru .fh = &fhandle, .fattr = &fattr }; - int status; + mode_t mode = sattr->ia_mode; + int status; dprintk("NFS call create %s\n", dentry->d_name.name); arg.createmode = NFS3_CREATE_UNCHECKED; @@ -323,6 +325,8 @@ nfs3_proc_create(struct inode *dir, stru arg.verifier[1] = current->pid; } + sattr->ia_mode &= ~current->fs->umask; + again: dir_attr.valid = 0; fattr.valid = 0; @@ -369,6 +373,9 @@ again: nfs_refresh_inode(dentry->d_inode, &fattr); dprintk("NFS reply setattr (post-create): %d\n", status); } + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); out: dprintk("NFS reply create: %d\n", status); return status; @@ -538,15 +545,24 @@ nfs3_proc_mkdir(struct inode *dir, struc .fh = &fhandle, .fattr = &fattr }; - int status; + int mode = sattr->ia_mode; + int status; dprintk("NFS call mkdir %s\n", dentry->d_name.name); dir_attr.valid = 0; fattr.valid = 0; + + sattr->ia_mode &= ~current->fs->umask; + status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); - if (status == 0) - status = nfs_instantiate(dentry, &fhandle, &fattr); + if (status != 0) + goto out; + status = nfs_instantiate(dentry, &fhandle, &fattr); + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: dprintk("NFS reply mkdir: %d\n", status); return status; } @@ -641,6 +657,7 @@ nfs3_proc_mknod(struct inode *dir, struc .fh = &fh, .fattr = &fattr }; + mode_t mode = sattr->ia_mode; int status; switch (sattr->ia_mode & S_IFMT) { @@ -653,12 +670,20 @@ nfs3_proc_mknod(struct inode *dir, struc dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, MAJOR(rdev), MINOR(rdev)); + + sattr->ia_mode &= ~current->fs->umask; + dir_attr.valid = 0; fattr.valid = 0; status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); - if (status == 0) - status = nfs_instantiate(dentry, &fh, &fattr); + if (status != 0) + goto out; + status = nfs_instantiate(dentry, &fh, &fattr); + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: dprintk("NFS reply mknod: %d\n", status); return status; } @@ -825,7 +850,8 @@ nfs3_proc_lock(struct file *filp, int cm struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, - .dir_inode_ops = &nfs_dir_inode_operations, + .dir_inode_ops = &nfs3_dir_inode_operations, + .file_inode_ops = &nfs3_file_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, @@ -856,4 +882,5 @@ struct nfs_rpc_ops nfs_v3_clientops = { .file_open = nfs_open, .file_release = nfs_release, .lock = nfs3_proc_lock, + .clear_acl_cache = nfs3_forget_cached_acls, }; Index: linux-2.6.12-rc3/fs/nfs/nfs3xdr.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/nfs3xdr.c +++ linux-2.6.12-rc3/fs/nfs/nfs3xdr.c @@ -21,6 +21,7 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_XDR @@ -79,6 +80,11 @@ extern int nfs_stat_to_errno(int); #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6) #define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2) +#define ACL3_getaclargs_sz (NFS3_fh_sz+1) +#define ACL3_setaclargs_sz (NFS3_fh_sz+1+2*(2+5*3)) +#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+2*(2+5*3)) +#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) + /* * Map file type to S_IFMT bits */ @@ -160,7 +166,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor) fattr->rdev = 0; - p = xdr_decode_hyper(p, &fattr->fsid_u.nfs3); + p = xdr_decode_hyper(p, &fattr->fsid.major); + fattr->fsid.minor = 0; p = xdr_decode_hyper(p, &fattr->fileid); p = xdr_decode_time3(p, &fattr->atime); p = xdr_decode_time3(p, &fattr->mtime); @@ -627,6 +634,74 @@ nfs3_xdr_commitargs(struct rpc_rqst *req return 0; } +#ifdef CONFIG_NFS_V3_ACL +/* + * Encode GETACL arguments + */ +static int +nfs3_xdr_getaclargs(struct rpc_rqst *req, u32 *p, + struct nfs3_getaclargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->mask); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + if (args->mask & (NFS_ACL | NFS_DFACL)) { + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + + ACL3_getaclres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, + NFSACL_MAXPAGES << PAGE_SHIFT); + } + return 0; +} + +/* + * Encode SETACL arguments + */ +static int +nfs3_xdr_setaclargs(struct rpc_rqst *req, u32 *p, + struct nfs3_setaclargs *args) +{ + struct xdr_buf *buf = &req->rq_snd_buf; + unsigned int base, len_in_head, len = nfsacl_size( + (args->mask & NFS_ACL) ? args->acl_access : NULL, + (args->mask & NFS_DFACL) ? args->acl_default : NULL); + int count, err; + + p = xdr_encode_fhandle(p, NFS_FH(args->inode)); + *p++ = htonl(args->mask); + base = (char *)p - (char *)buf->head->iov_base; + /* put as much of the acls into head as possible. */ + len_in_head = min_t(unsigned int, buf->head->iov_len - base, len); + len -= len_in_head; + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + len_in_head); + + for (count = 0; (count << PAGE_SHIFT) < len; count++) { + args->pages[count] = alloc_page(GFP_KERNEL); + if (!args->pages[count]) { + while (count) + __free_page(args->pages[--count]); + return -ENOMEM; + } + } + xdr_encode_pages(buf, args->pages, 0, len); + + err = nfsacl_encode(buf, base, args->inode, + (args->mask & NFS_ACL) ? + args->acl_access : NULL, 1, 0); + if (err > 0) + err = nfsacl_encode(buf, base + err, args->inode, + (args->mask & NFS_DFACL) ? + args->acl_default : NULL, 1, + NFS_ACL_DEFAULT); + return (err > 0) ? 0 : err; +} +#endif /* CONFIG_NFS_V3_ACL */ + /* * NFS XDR decode functions */ @@ -978,6 +1053,54 @@ nfs3_xdr_commitres(struct rpc_rqst *req, return 0; } +#ifdef CONFIG_NFS_V3_ACL +/* + * Decode GETACL reply + */ +static int +nfs3_xdr_getaclres(struct rpc_rqst *req, u32 *p, + struct nfs3_getaclres *res) +{ + struct xdr_buf *buf = &req->rq_rcv_buf; + int status = ntohl(*p++); + struct posix_acl **acl; + unsigned int *aclcnt; + int err, base; + + if (status != 0) + return -nfs_stat_to_errno(status); + p = xdr_decode_post_op_attr(p, res->fattr); + res->mask = ntohl(*p++); + if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + return -EINVAL; + base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base; + + acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL; + aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL; + err = nfsacl_decode(buf, base, aclcnt, acl); + + acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL; + aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL; + if (err > 0) + err = nfsacl_decode(buf, base + err, aclcnt, acl); + return (err > 0) ? 0 : err; +} + +/* + * Decode setacl reply. + */ +static int +nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) +{ + int status = ntohl(*p++); + + if (status) + return -nfs_stat_to_errno(status); + xdr_decode_post_op_attr(p, fattr); + return 0; +} +#endif /* CONFIG_NFS_V3_ACL */ + #ifndef MAX # define MAX(a, b) (((a) > (b))? (a) : (b)) #endif @@ -1021,3 +1144,28 @@ struct rpc_version nfs_version3 = { .procs = nfs3_procedures }; +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_procinfo nfs3_acl_procedures[] = { + [ACLPROC3_GETACL] = { + .p_proc = ACLPROC3_GETACL, + .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs, + .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, + .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2, + .p_timer = 1, + }, + [ACLPROC3_SETACL] = { + .p_proc = ACLPROC3_SETACL, + .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs, + .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, + .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2, + .p_timer = 0, + }, +}; + +struct rpc_version nfsacl_version3 = { + .number = 3, + .nrprocs = sizeof(nfs3_acl_procedures)/ + sizeof(nfs3_acl_procedures[0]), + .procs = nfs3_acl_procedures, +}; +#endif /* CONFIG_NFS_V3_ACL */ Index: linux-2.6.12-rc3/fs/nfs/nfs4_fs.h =================================================================== --- /dev/null +++ linux-2.6.12-rc3/fs/nfs/nfs4_fs.h @@ -0,0 +1,271 @@ +/* + * linux/fs/nfs/nfs4_fs.h + * + * Copyright (C) 2005 Trond Myklebust + * + * NFSv4-specific filesystem definitions and declarations + */ + +#ifndef __LINUX_FS_NFS_NFS4_FS_H +#define __LINUX_FS_NFS_NFS4_FS_H + +#ifdef CONFIG_NFS_V4 + +#include + +struct idmap; + +/* + * In a seqid-mutating op, this macro controls which error return + * values trigger incrementation of the seqid. + * + * from rfc 3010: + * The client MUST monotonically increment the sequence number for the + * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE + * operations. This is true even in the event that the previous + * operation that used the sequence number received an error. The only + * exception to this rule is if the previous operation received one of + * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID, + * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR, + * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE. + * + */ +#define seqid_mutating_err(err) \ +(((err) != NFSERR_STALE_CLIENTID) && \ + ((err) != NFSERR_STALE_STATEID) && \ + ((err) != NFSERR_BAD_STATEID) && \ + ((err) != NFSERR_BAD_SEQID) && \ + ((err) != NFSERR_BAD_XDR) && \ + ((err) != NFSERR_RESOURCE) && \ + ((err) != NFSERR_NOFILEHANDLE)) + +enum nfs4_client_state { + NFS4CLNT_OK = 0, +}; + +/* + * The nfs4_client identifies our client state to the server. + */ +struct nfs4_client { + struct list_head cl_servers; /* Global list of servers */ + struct in_addr cl_addr; /* Server identifier */ + u64 cl_clientid; /* constant */ + nfs4_verifier cl_confirm; + unsigned long cl_state; + + u32 cl_lockowner_id; + + /* + * The following rwsem ensures exclusive access to the server + * while we recover the state following a lease expiration. + */ + struct rw_semaphore cl_sem; + + struct list_head cl_delegations; + struct list_head cl_state_owners; + struct list_head cl_unused; + int cl_nunused; + spinlock_t cl_lock; + atomic_t cl_count; + + struct rpc_clnt * cl_rpcclient; + struct rpc_cred * cl_cred; + + struct list_head cl_superblocks; /* List of nfs_server structs */ + + unsigned long cl_lease_time; + unsigned long cl_last_renewal; + struct work_struct cl_renewd; + struct work_struct cl_recoverd; + + wait_queue_head_t cl_waitq; + struct rpc_wait_queue cl_rpcwaitq; + + /* used for the setclientid verifier */ + struct timespec cl_boot_time; + + /* idmapper */ + struct idmap * cl_idmap; + + /* Our own IP address, as a null-terminated string. + * This is used to generate the clientid, and the callback address. + */ + char cl_ipaddr[16]; + unsigned char cl_id_uniquifier; +}; + +/* + * NFS4 state_owners and lock_owners are simply labels for ordered + * sequences of RPC calls. Their sole purpose is to provide once-only + * semantics by allowing the server to identify replayed requests. + * + * The ->so_iosem is held during all state_owner seqid-mutating operations: + * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize + * so_seqid. + */ +struct nfs4_state_owner { + struct list_head so_list; /* per-clientid list of state_owners */ + struct nfs4_client *so_client; + u32 so_id; /* 32-bit identifier, unique */ + struct iosem so_iosem; + u32 so_seqid; /* protected by so_iosem */ + atomic_t so_count; + + struct rpc_cred *so_cred; /* Associated cred */ + struct list_head so_states; + struct list_head so_delegations; +}; + +/* + * struct nfs4_state maintains the client-side state for a given + * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). + * + * OPEN: + * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, + * we need to know how many files are open for reading or writing on a + * given inode. This information too is stored here. + * + * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) + */ + +struct nfs4_lock_state { + struct list_head ls_locks; /* Other lock stateids */ + fl_owner_t ls_owner; /* POSIX lock owner */ +#define NFS_LOCK_INITIALIZED 1 + int ls_flags; + u32 ls_seqid; + u32 ls_id; + nfs4_stateid ls_stateid; + atomic_t ls_count; +}; + +/* bits for nfs4_state->flags */ +enum { + LK_STATE_IN_USE, + NFS_DELEGATED_STATE, +}; + +struct nfs4_state { + struct list_head open_states; /* List of states for the same state_owner */ + struct list_head inode_states; /* List of states for the same inode */ + struct list_head lock_states; /* List of subservient lock stateids */ + + struct nfs4_state_owner *owner; /* Pointer to the open owner */ + struct inode *inode; /* Pointer to the inode */ + + unsigned long flags; /* Do we hold any locks? */ + struct iosem lock_iosem; /* Serializes file locking operations */ + rwlock_t state_lock; /* Protects the lock_states list */ + + nfs4_stateid stateid; + + unsigned int nreaders; + unsigned int nwriters; + int state; /* State on the server (R,W, or RW) */ + atomic_t count; +}; + + +struct nfs4_exception { + long timeout; + int retry; +}; + +struct nfs4_state_recovery_ops { + int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); + int (*recover_lock)(struct nfs4_state *, struct file_lock *); +}; + +extern struct dentry_operations nfs4_dentry_operations; +extern struct inode_operations nfs4_dir_inode_operations; + +/* inode.c */ +extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t); +extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int); +extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); + + +/* nfs4proc.c */ +extern int nfs4_map_errors(int err); +extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); +extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); +extern int nfs4_proc_async_renew(struct nfs4_client *); +extern int nfs4_proc_renew(struct nfs4_client *); +extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); +extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); +extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); +extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, + struct nfs_fs_locations *fs_locations, struct page *page); + +extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; +extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; + +extern const u32 nfs4_fattr_bitmap[2]; +extern const u32 nfs4_statfs_bitmap[2]; +extern const u32 nfs4_pathconf_bitmap[2]; +extern const u32 nfs4_fsinfo_bitmap[2]; + +/* nfs4renewd.c */ +extern void nfs4_schedule_state_renewal(struct nfs4_client *); +extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); +extern void nfs4_kill_renewd(struct nfs4_client *); +extern void nfs4_renew_state(void *); + +/* nfs4state.c */ +extern void init_nfsv4_state(struct nfs_server *); +extern void destroy_nfsv4_state(struct nfs_server *); +extern struct nfs4_client *nfs4_get_client(struct in_addr *); +extern void nfs4_put_client(struct nfs4_client *clp); +extern int nfs4_init_client(struct nfs4_client *clp); +extern struct nfs4_client *nfs4_find_client(struct in_addr *); +extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); + +extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); +extern void nfs4_put_state_owner(struct nfs4_state_owner *); +extern void nfs4_drop_state_owner(struct nfs4_state_owner *); +extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); +extern void nfs4_put_open_state(struct nfs4_state *); +extern void nfs4_close_state(struct nfs4_state *, mode_t); +extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); +extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); +extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t); +extern struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t); +extern void nfs4_put_lock_state(struct nfs4_lock_state *state); +extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); +extern void nfs4_notify_setlk(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_notify_unlck(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); + +extern const nfs4_stateid zero_stateid; + +static void inline nfs_lock_state_owner(struct nfs4_state_owner *sp) +{ + iosem_lock(&sp->so_iosem); +} + +static void inline nfs_unlock_state_owner(struct nfs4_state_owner *sp) +{ + iosem_unlock(&sp->so_iosem); +} + +/* nfs4xdr.c */ +extern uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus); +extern struct rpc_procinfo nfs4_procedures[]; + +struct nfs4_mount_data; + +/* callback_xdr.c */ +extern struct svc_version nfs4_callback_version1; + +#else + +#define init_nfsv4_state(server) do { } while (0) +#define destroy_nfsv4_state(server) do { } while (0) +#define nfs4_put_state_owner(inode, owner) do { } while (0) +#define nfs4_put_open_state(state) do { } while (0) +#define nfs4_close_state(a, b) do { } while (0) + +#endif /* CONFIG_NFS_V4 */ +#endif /* __LINUX_FS_NFS_NFS4_FS.H */ Index: linux-2.6.12-rc3/fs/nfs/nfs4proc.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/nfs4proc.c +++ linux-2.6.12-rc3/fs/nfs/nfs4proc.c @@ -47,7 +47,9 @@ #include #include #include +#include +#include "nfs4_fs.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -62,8 +64,6 @@ static int nfs4_handle_exception(struct extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; -extern nfs4_stateid zero_stateid; - /* Prevent leaks of NFSv4 errors into userland */ int nfs4_map_errors(int err) { @@ -104,7 +104,7 @@ const u32 nfs4_statfs_bitmap[2] = { | FATTR4_WORD1_SPACE_TOTAL }; -u32 nfs4_pathconf_bitmap[2] = { +const u32 nfs4_pathconf_bitmap[2] = { FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME, 0 @@ -124,7 +124,7 @@ static void nfs4_setup_readdir(u64 cooki BUG_ON(readdir->count < 80); if (cookie > 2) { - readdir->cookie = (cookie > 2) ? cookie : 0; + readdir->cookie = cookie; memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier)); return; } @@ -308,7 +308,7 @@ static int _nfs4_open_delegation_recall( }; int status = 0; - down(&sp->so_sema); + nfs_lock_state_owner(sp); if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) goto out; if (state->state == 0) @@ -324,7 +324,7 @@ static int _nfs4_open_delegation_recall( clear_bit(NFS_DELEGATED_STATE, &state->flags); } out: - up(&sp->so_sema); + nfs_unlock_state_owner(sp); dput(parent); return status; } @@ -461,6 +461,7 @@ static int _nfs4_open_expired(struct nfs .f_attr = &f_attr, .server = server, }; + uint32_t generation; int status = 0; if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { @@ -471,23 +472,17 @@ static int _nfs4_open_expired(struct nfs set_bit(NFS_DELEGATED_STATE, &state->flags); goto out; } + /* If we are in a failover situation, recover path first */ + status = nfs_try_migrate_inode(dir, parent); + if (status != 0) + goto out_nodeleg; + generation = server->generation; status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) goto out_nodeleg; - /* Check if files differ */ - if ((f_attr.mode & S_IFMT) != (inode->i_mode & S_IFMT)) + status = nfs_try_migrate_filehandle(inode, &o_res.fh, o_res.f_attr, generation); + if (status != 0) goto out_stale; - /* Has the file handle changed? */ - if (nfs_compare_fh(&o_res.fh, NFS_FH(inode)) != 0) { - /* Verify if the change attributes are the same */ - if (f_attr.change_attr != NFS_I(inode)->change_attr) - goto out_stale; - if (nfs_size_to_loff_t(f_attr.size) != inode->i_size) - goto out_stale; - /* Lets just pretend that this is the same file */ - nfs_copy_fh(NFS_FH(inode), &o_res.fh); - NFS_I(inode)->fileid = f_attr.fileid; - } memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); if (o_res.delegation_type != 0) { if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) @@ -501,7 +496,6 @@ out: dput(parent); return status; out_stale: - status = -ESTALE; /* Invalidate the state owner so we don't ever use it again */ nfs4_drop_state_owner(sp); d_drop(dentry); @@ -556,7 +550,7 @@ static int _nfs4_open_delegated(struct i dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); goto out_err; } - down(&sp->so_sema); + nfs_lock_state_owner(sp); state = nfs4_get_open_state(inode, sp); if (state == NULL) goto out_err; @@ -581,7 +575,7 @@ static int _nfs4_open_delegated(struct i set_bit(NFS_DELEGATED_STATE, &state->flags); update_open_stateid(state, &delegation->stateid, open_flags); out_ok: - up(&sp->so_sema); + nfs_unlock_state_owner(sp); nfs4_put_state_owner(sp); up_read(&nfsi->rwsem); up_read(&clp->cl_sem); @@ -592,7 +586,7 @@ out_err: if (sp != NULL) { if (state != NULL) nfs4_put_open_state(state); - up(&sp->so_sema); + nfs_unlock_state_owner(sp); nfs4_put_state_owner(sp); } up_read(&nfsi->rwsem); @@ -657,7 +651,7 @@ static int _nfs4_do_open(struct inode *d } else o_arg.u.attrs = sattr; /* Serialization for the sequence id */ - down(&sp->so_sema); + nfs_lock_state_owner(sp); status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) @@ -673,7 +667,7 @@ static int _nfs4_do_open(struct inode *d update_open_stateid(state, &o_res.stateid, flags); if (o_res.delegation_type != 0) nfs_inode_set_delegation(inode, cred, &o_res); - up(&sp->so_sema); + nfs_unlock_state_owner(sp); nfs4_put_state_owner(sp); up_read(&clp->cl_sem); *res = state; @@ -682,7 +676,7 @@ out_err: if (sp != NULL) { if (state != NULL) nfs4_put_open_state(state); - up(&sp->so_sema); + nfs_unlock_state_owner(sp); nfs4_put_state_owner(sp); } /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */ @@ -748,11 +742,10 @@ static int _nfs4_do_setattr(struct nfs_s fattr->valid = 0; - if (state != NULL) + if (state != NULL) { msg.rpc_cred = state->owner->so_cred; - if (sattr->ia_valid & ATTR_SIZE) nfs4_copy_stateid(&arg.stateid, state, NULL); - else + } else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); return rpc_call_sync(server->client, &msg, 0); @@ -809,7 +802,7 @@ static void nfs4_close_done(struct rpc_t } state->state = calldata->arg.open_flags; nfs4_put_open_state(state); - up(&sp->so_sema); + nfs_unlock_state_owner(sp); nfs4_put_state_owner(sp); up_read(&server->nfs4_state->cl_sem); kfree(calldata); @@ -866,15 +859,46 @@ int nfs4_do_close(struct inode *inode, s * caller that an asynchronous RPC call has been launched, and * that it will release the semaphores on completion. */ - return (status == 0) ? -EINPROGRESS : status; + if (status == 0) + return -EINPROGRESS; + kfree(calldata); + return status; } -struct inode * +static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) +{ + struct file *filp; + int flags; + + flags = nd->intent.open.flags & ~(FMODE_READ|FMODE_WRITE); + switch (nd->intent.open.flags & (FMODE_READ|FMODE_WRITE)) { + case FMODE_READ|FMODE_WRITE: + flags |= O_RDWR; + break; + case FMODE_WRITE: + flags |= O_WRONLY; + break; + case FMODE_READ: + flags |= O_RDONLY; + } + + filp = dentry_open(dget(dentry), mntget(nd->mnt), flags); + if (!IS_ERR(filp)) { + struct nfs_open_context *ctx; + ctx = (struct nfs_open_context *)filp->private_data; + ctx->state = state; + nd->intent.open.file = filp; + } else + nfs4_close_state(state, nd->intent.open.flags); +} + +struct dentry * nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct iattr attr; struct rpc_cred *cred; struct nfs4_state *state; + struct dentry *res; if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; @@ -888,16 +912,23 @@ nfs4_atomic_open(struct inode *dir, stru cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); if (IS_ERR(cred)) - return (struct inode *)cred; + return (struct dentry *)cred; state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); put_rpccred(cred); - if (IS_ERR(state)) - return (struct inode *)state; - return state->inode; + if (IS_ERR(state)) { + if (PTR_ERR(state) == -ENOENT) + d_add(dentry, NULL); + return (struct dentry *)state; + } + res = d_add_unique(dentry, state->inode); + if (res != NULL) + dentry = res; + nfs4_intent_set_file(nd, dentry, state); + return res; } int -nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) +nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) { struct rpc_cred *cred; struct nfs4_state *state; @@ -910,18 +941,18 @@ nfs4_open_revalidate(struct inode *dir, if (IS_ERR(state)) state = nfs4_do_open(dir, dentry, openflags, NULL, cred); put_rpccred(cred); - if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) + if (state == ERR_PTR(-ENOENT) && dentry->d_inode == NULL) return 1; if (IS_ERR(state)) return 0; inode = state->inode; + iput(inode); if (inode == dentry->d_inode) { - iput(inode); + nfs4_intent_set_file(nd, dentry, state); return 1; } d_drop(dentry); nfs4_close_state(state, openflags); - iput(inode); return 0; } @@ -950,7 +981,7 @@ static int _nfs4_server_capabilities(str return status; } -static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) +int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { struct nfs4_exception exception = { }; int err; @@ -1116,47 +1147,31 @@ static int nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct iattr *sattr) { - struct inode * inode = dentry->d_inode; - int size_change = sattr->ia_valid & ATTR_SIZE; - struct nfs4_state *state = NULL; - int need_iput = 0; + struct rpc_cred *cred; + struct inode *inode = dentry->d_inode; + struct nfs4_state *state; int status; fattr->valid = 0; - if (size_change) { - struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - if (IS_ERR(cred)) - return PTR_ERR(cred); + cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); + if (IS_ERR(cred)) + return PTR_ERR(cred); + /* Search for an existing WRITE delegation first */ + state = nfs4_open_delegated(inode, FMODE_WRITE, cred); + if (!IS_ERR(state)) { + /* NB: nfs4_open_delegated() bumps the inode->i_count */ + iput(inode); + } else { + /* Search for an existing open(O_WRITE) stateid */ state = nfs4_find_state(inode, cred, FMODE_WRITE); - if (state == NULL) { - state = nfs4_open_delegated(dentry->d_inode, - FMODE_WRITE, cred); - if (IS_ERR(state)) - state = nfs4_do_open(dentry->d_parent->d_inode, - dentry, FMODE_WRITE, - NULL, cred); - need_iput = 1; - } - put_rpccred(cred); - if (IS_ERR(state)) - return PTR_ERR(state); - - if (state->inode != inode) { - printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode); - status = -EIO; - goto out; - } } + status = nfs4_do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, state); -out: - if (state) { - inode = state->inode; + if (state != NULL) nfs4_close_state(state, FMODE_WRITE); - if (need_iput) - iput(inode); - } + put_rpccred(cred); return status; } @@ -1436,7 +1451,7 @@ static int nfs4_proc_commit(struct nfs_w static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs4_state *state; struct rpc_cred *cred; @@ -1458,11 +1473,11 @@ nfs4_proc_create(struct inode *dir, stru struct nfs_fattr fattr; status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, NFS_FH(state->inode), sattr, state); - if (status == 0) - goto out; - } else if (flags != 0) - goto out; - nfs4_close_state(state, flags); + } + if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) + nfs4_intent_set_file(nd, dentry, state); + else + nfs4_close_state(state, flags); out: return status; } @@ -2104,63 +2119,191 @@ nfs4_proc_renew(struct nfs4_client *clp) return 0; } -/* - * We will need to arrange for the VFS layer to provide an atomic open. - * Until then, this open method is prone to inefficiency and race conditions - * due to the lookup, potential create, and open VFS calls from sys_open() - * placed on the wire. +static inline int nfs4_server_supports_acls(struct nfs_server *server) +{ + return (server->caps & NFS_CAP_ACLS) + && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) + && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); +} + +/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that + * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on + * the stack. */ -static int -nfs4_proc_file_open(struct inode *inode, struct file *filp) +#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) + +static void buf_to_pages(const void *buf, size_t buflen, + struct page **pages, unsigned int *pgbase) { - struct dentry *dentry = filp->f_dentry; - struct nfs_open_context *ctx; - struct nfs4_state *state = NULL; - struct rpc_cred *cred; - int status = -ENOMEM; + const void *p = buf; - dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", - (int)dentry->d_parent->d_name.len, - dentry->d_parent->d_name.name, - (int)dentry->d_name.len, dentry->d_name.name); + *pgbase = offset_in_page(buf); + p -= *pgbase; + while (p < buf + buflen) { + *(pages++) = virt_to_page(p); + p += PAGE_CACHE_SIZE; + } +} +struct nfs4_cached_acl { + int cached; + size_t len; + char data[0]; +}; - /* Find our open stateid */ - cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - if (IS_ERR(cred)) - return PTR_ERR(cred); - ctx = alloc_nfs_open_context(dentry, cred); - put_rpccred(cred); - if (unlikely(ctx == NULL)) - return -ENOMEM; - status = -EIO; /* ERACE actually */ - state = nfs4_find_state(inode, cred, filp->f_mode); - if (unlikely(state == NULL)) - goto no_state; - ctx->state = state; - nfs4_close_state(state, filp->f_mode); - ctx->mode = filp->f_mode; - nfs_file_set_open_context(filp, ctx); - put_nfs_open_context(ctx); - if (filp->f_mode & FMODE_WRITE) - nfs_begin_data_update(inode); - return 0; -no_state: - printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); - put_nfs_open_context(ctx); - return status; +static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&inode->i_lock); + kfree(nfsi->nfs4_acl); + nfsi->nfs4_acl = acl; + spin_unlock(&inode->i_lock); } -/* - * Release our state - */ -static int -nfs4_proc_file_release(struct inode *inode, struct file *filp) +static void nfs4_zap_acl_attr(struct inode *inode) { - if (filp->f_mode & FMODE_WRITE) - nfs_end_data_update(inode); - nfs_file_clear_open_context(filp); - return 0; + nfs4_set_cached_acl(inode, NULL); +} + +static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs4_cached_acl *acl; + int ret = -ENOENT; + + spin_lock(&inode->i_lock); + acl = nfsi->nfs4_acl; + if (acl == NULL) + goto out; + if (buf == NULL) /* user is just asking for length */ + goto out_len; + if (acl->cached == 0) + goto out; + ret = -ERANGE; /* see getxattr(2) man page */ + if (acl->len > buflen) + goto out; + memcpy(buf, acl->data, acl->len); +out_len: + ret = acl->len; +out: + spin_unlock(&inode->i_lock); + return ret; +} + +static void nfs4_write_cached_acl(struct inode *inode, const char *buf, size_t acl_len) +{ + struct nfs4_cached_acl *acl; + + if (buf && acl_len <= PAGE_SIZE) { + acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); + if (acl == NULL) + goto out; + acl->cached = 1; + memcpy(acl->data, buf, acl_len); + } else { + acl = kmalloc(sizeof(*acl), GFP_KERNEL); + if (acl == NULL) + goto out; + acl->cached = 0; + } + acl->len = acl_len; +out: + nfs4_set_cached_acl(inode, acl); +} + +static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +{ + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_getaclargs args = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + size_t resp_len = buflen; + void *resp_buf; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], + .rpc_argp = &args, + .rpc_resp = &resp_len, + }; + struct page *localpage = NULL; + int ret; + + if (buflen < PAGE_SIZE) { + /* As long as we're doing a round trip to the server anyway, + * let's be prepared for a page of acl data. */ + localpage = alloc_page(GFP_KERNEL); + resp_buf = page_address(localpage); + if (localpage == NULL) + return -ENOMEM; + args.acl_pages[0] = localpage; + args.acl_pgbase = 0; + args.acl_len = PAGE_SIZE; + } else { + resp_buf = buf; + buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); + } + ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (ret) + goto out_free; + if (resp_len > args.acl_len) + nfs4_write_cached_acl(inode, NULL, resp_len); + else + nfs4_write_cached_acl(inode, resp_buf, resp_len); + if (buf) { + ret = -ERANGE; + if (resp_len > buflen) + goto out_free; + if (localpage) + memcpy(buf, resp_buf, resp_len); + } + ret = resp_len; +out_free: + if (localpage) + __free_page(localpage); + return ret; +} + +static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + ret = nfs_revalidate_inode(server, inode); + if (ret < 0) + return ret; + ret = nfs4_read_cached_acl(inode, buf, buflen); + if (ret != -ENOENT) + return ret; + return nfs4_get_acl_uncached(inode, buf, buflen); +} + +static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_setaclargs arg = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL], + .rpc_argp = &arg, + .rpc_resp = NULL, + }; + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); + ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + if (ret == 0) + nfs4_write_cached_acl(inode, buf, buflen); + return ret; } static int @@ -2346,38 +2489,56 @@ nfs4_proc_setclientid_confirm(struct nfs return status; } -static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid) +struct nfs4_delegreturn_data { + struct nfs_server *server; + struct nfs_fh fhandle; + nfs4_stateid stateid; + struct nfs4_delegreturnargs args; +}; + +static void nfs4_delegreturn_done(struct rpc_task *task) { - struct nfs4_delegreturnargs args = { - .fhandle = NFS_FH(inode), - .stateid = stateid, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN], - .rpc_argp = &args, - .rpc_cred = cred, - }; + struct nfs4_delegreturn_data *calldata = (struct nfs4_delegreturn_data *)task->tk_calldata; - return rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + switch (task->tk_status) { + case 0: + break; + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + nfs4_schedule_state_recovery(calldata->server->nfs4_state); + break; + default: + if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) { + rpc_restart_call(task); + return; + } + } + kfree(calldata); } int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid) { - struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_exception exception = { }; - int err; - do { - err = _nfs4_proc_delegreturn(inode, cred, stateid); - switch (err) { - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(server->nfs4_state); - case 0: - return 0; - } - err = nfs4_handle_exception(server, err, &exception); - } while (exception.retry); - return err; + struct nfs4_delegreturn_data *calldata; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN], + .rpc_cred = cred, + }; + int status = -ENOMEM; + + calldata = kmalloc(sizeof(*calldata), GFP_NOFS); + if (calldata == NULL) + goto out; + calldata->server = NFS_SERVER(inode); + nfs_copy_fh(&calldata->fhandle, NFS_FH(inode)); + memcpy(&calldata->stateid, stateid, sizeof(calldata->stateid)); + calldata->args.fhandle = &calldata->fhandle; + calldata->args.stateid = &calldata->stateid; + msg.rpc_argp = &calldata->args; + status = rpc_call_async(NFS_CLIENT(inode), &msg, 0, nfs4_delegreturn_done, calldata); + if (status != 0) + kfree(calldata); +out: + return status; } #define NFS4_LOCK_MINTIMEOUT (1 * HZ) @@ -2447,7 +2608,7 @@ static int _nfs4_proc_getlk(struct nfs4_ down_read(&clp->cl_sem); nlo.clientid = clp->cl_clientid; - down(&state->lock_sema); + iosem_lock(&state->lock_iosem); lsp = nfs4_find_lock_state(state, request->fl_owner); if (lsp) nlo.id = lsp->ls_id; @@ -2478,7 +2639,7 @@ static int _nfs4_proc_getlk(struct nfs4_ } if (lsp) nfs4_put_lock_state(lsp); - up(&state->lock_sema); + iosem_unlock(&state->lock_iosem); up_read(&clp->cl_sem); return status; } @@ -2515,56 +2676,97 @@ static int do_vfs_lock(struct file *file return res; } -static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +struct nfs4_unlockdata { + struct nfs_lockargs arg; + struct nfs_lockres res; + struct nfs_locku_opargs luargs; + struct nfs4_lock_state *lsp; + struct nfs4_state *state; + struct nfs_open_context *ctx; + struct file_lock fl; + struct iosem_work worker; +}; + +static void release_calldata(struct nfs4_unlockdata *calldata) { - struct inode *inode = state->inode; - struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_client *clp = server->nfs4_state; - struct nfs_lockargs arg = { - .fh = NFS_FH(inode), - .type = nfs4_lck_type(cmd, request), - .offset = request->fl_start, - .length = nfs4_lck_length(request), - }; - struct nfs_lockres res = { - .server = server, - }; + if (calldata->lsp != NULL) { + nfs4_notify_unlck(calldata->state, &calldata->fl, + calldata->lsp); + nfs4_put_lock_state(calldata->lsp); + } + iosem_unlock(&calldata->state->lock_iosem); + up_read(&calldata->state->owner->so_client->cl_sem); + put_nfs_open_context(calldata->ctx); + kfree(calldata); +} + +static void nfs4_locku_done(struct rpc_task *task) +{ + struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; + + + nfs4_increment_lock_seqid(task->tk_status, calldata->lsp); + if (task->tk_status == 0) + memcpy(&calldata->lsp->ls_stateid, &calldata->res.u.stateid, + sizeof(calldata->lsp->ls_stateid)); + release_calldata(calldata); +} + +static void nfs4_do_unlock_func(void *data) +{ + struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)data; struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], - .rpc_argp = &arg, - .rpc_resp = &res, - .rpc_cred = state->owner->so_cred, + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], + .rpc_argp = &calldata->arg, + .rpc_resp = &calldata->res, + .rpc_cred = calldata->state->owner->so_cred, }; struct nfs4_lock_state *lsp; - struct nfs_locku_opargs luargs; - int status = 0; - - down_read(&clp->cl_sem); - down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); - if (!lsp) - goto out; - /* We might have lost the locks! */ - if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { - luargs.seqid = lsp->ls_seqid; - memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); - arg.u.locku = &luargs; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_lock_seqid(status, lsp); - } - if (status == 0) { - memcpy(&lsp->ls_stateid, &res.u.stateid, - sizeof(lsp->ls_stateid)); - nfs4_notify_unlck(state, request, lsp); - } - nfs4_put_lock_state(lsp); + lsp = nfs4_find_lock_state(calldata->state, calldata->fl.fl_owner); + if (lsp == NULL) + goto out_free; + if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) + goto out_free; + + calldata->lsp = lsp; + calldata->luargs.seqid = lsp->ls_seqid; + memcpy(&calldata->luargs.stateid, &lsp->ls_stateid, sizeof(calldata->luargs.stateid)); + + if (rpc_call_async(NFS_SERVER(calldata->state->inode)->client, + &msg, 0, nfs4_locku_done, calldata) != 0) + goto out_free; + return; +out_free: + release_calldata(calldata); +} + +static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *fl) +{ + struct nfs4_unlockdata *calldata; + + if (!test_bit(LK_STATE_IN_USE, &state->flags)) + goto out; + calldata = (struct nfs4_unlockdata *)kmalloc(sizeof(*calldata), GFP_KERNEL); + if (calldata == NULL) + return -ENOMEM; + calldata->arg.fh = NFS_FH(state->inode); + calldata->arg.type = nfs4_lck_type(cmd, fl); + calldata->arg.offset = fl->fl_start; + calldata->arg.length = nfs4_lck_length(fl); + calldata->res.server = NFS_SERVER(state->inode); + calldata->arg.u.locku = &calldata->luargs; + calldata->lsp = NULL; + calldata->state = state; + calldata->ctx = get_nfs_open_context((struct nfs_open_context*)fl->fl_file->private_data); + memcpy(&calldata->fl, fl, sizeof(calldata->fl)); + iosem_work_init(&calldata->worker, nfs4_do_unlock_func, calldata); + down_read(&state->owner->so_client->cl_sem); + iosem_lock_and_schedule_work(&state->lock_iosem, &calldata->worker); + /* Note: We do the VFS unlock now! */ + do_vfs_lock(fl->fl_file, fl); out: - up(&state->lock_sema); - if (status == 0) - do_vfs_lock(request->fl_file, request); - up_read(&clp->cl_sem); - return status; + return 0; } static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) @@ -2623,13 +2825,13 @@ static int _nfs4_do_setlk(struct nfs4_st largs.u.open_lock = &otl; largs.new_lock_owner = 1; arg.u.lock = &largs; - down(&owner->so_sema); + nfs_lock_state_owner(owner); otl.open_seqid = owner->so_seqid; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); /* increment open_owner seqid on success, and * seqid mutating errors */ nfs4_increment_seqid(status, owner); - up(&owner->so_sema); + nfs_unlock_state_owner(owner); } else { struct nfs_exist_lock el = { .seqid = lsp->ls_seqid, @@ -2670,9 +2872,9 @@ static int _nfs4_proc_setlk(struct nfs4_ int status; down_read(&clp->cl_sem); - down(&state->lock_sema); + iosem_lock(&state->lock_iosem); status = _nfs4_do_setlk(state, cmd, request, 0); - up(&state->lock_sema); + iosem_unlock(&state->lock_iosem); if (status == 0) { /* Note: we always want to sleep here! */ request->fl_flags |= FL_SLEEP; @@ -2708,6 +2910,9 @@ nfs4_proc_lock(struct file *filp, int cm ctx = (struct nfs_open_context *)filp->private_data; state = ctx->state; + if (state == NULL) + return -ENOLCK; + if (request->fl_start < 0 || request->fl_end < 0) return -EINVAL; @@ -2733,6 +2938,79 @@ nfs4_proc_lock(struct file *filp, int cm return status; } + +#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" + +int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, + size_t buflen, int flags) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EINVAL; + + if (!S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + return -EPERM; + + return nfs4_proc_set_acl(inode, buf, buflen); +} + +/* The getxattr man page suggests returning -ENODATA for unknown attributes, + * and that's what we'll do for e.g. user attributes that haven't been set. + * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported + * attributes in kernel-managed attribute namespaces. */ +ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf, + size_t buflen) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EOPNOTSUPP; + + return nfs4_proc_get_acl(inode, buf, buflen); +} + +ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) +{ + size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; + + if (buf && buflen < len) + return -ERANGE; + if (buf) + memcpy(buf, XATTR_NAME_NFSV4_ACL, len); + return len; +} + +int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, + struct nfs_fs_locations *fs_locations, struct page *page) +{ + struct nfs_server *server = NFS_SERVER(dir); + u32 bitmask[2] = { + [0] = server->attr_bitmask[0] | FATTR4_WORD0_FS_LOCATIONS, + [1] = server->attr_bitmask[1], + }; + struct nfs4_fs_locations_arg args = { + .dir_fh = NFS_FH(dir), + .name = &dentry->d_name, + .page = page, + .bitmask = bitmask, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], + .rpc_argp = &args, + .rpc_resp = &fs_locations, + }; + int status; + + dprintk("%s: start\n", __FUNCTION__); + fs_locations->fattr.valid = 0; + fs_locations->server = server; + status = rpc_call_sync(server->client, &msg, 0); + dprintk("%s: returned status = %d\n", __FUNCTION__, status); + return status; +} + struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, @@ -2743,10 +3021,20 @@ struct nfs4_state_recovery_ops nfs4_netw .recover_lock = nfs4_lock_expired, }; +static struct inode_operations nfs4_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, +}; + struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, .dir_inode_ops = &nfs4_dir_inode_operations, + .file_inode_ops = &nfs4_file_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, @@ -2774,9 +3062,10 @@ struct nfs_rpc_ops nfs_v4_clientops = { .read_setup = nfs4_proc_read_setup, .write_setup = nfs4_proc_write_setup, .commit_setup = nfs4_proc_commit_setup, - .file_open = nfs4_proc_file_open, - .file_release = nfs4_proc_file_release, + .file_open = nfs_open, + .file_release = nfs_release, .lock = nfs4_proc_lock, + .clear_acl_cache = nfs4_zap_acl_attr, }; /* Index: linux-2.6.12-rc3/fs/nfs/nfs4renewd.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/nfs4renewd.c +++ linux-2.6.12-rc3/fs/nfs/nfs4renewd.c @@ -53,6 +53,7 @@ #include #include #include +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_PROC Index: linux-2.6.12-rc3/fs/nfs/nfs4state.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/nfs4state.c +++ linux-2.6.12-rc3/fs/nfs/nfs4state.c @@ -46,24 +46,18 @@ #include #include +#include "nfs4_fs.h" #include "callback.h" #include "delegation.h" #define OPENOWNER_POOL_SIZE 8 -static DEFINE_SPINLOCK(state_spinlock); - -nfs4_stateid zero_stateid; - -#if 0 -nfs4_stateid one_stateid = - { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; -#endif +const nfs4_stateid zero_stateid; +static DEFINE_SPINLOCK(state_spinlock); static LIST_HEAD(nfs4_clientid_list); static void nfs4_recover_state(void *); -extern void nfs4_renew_state(void *); void init_nfsv4_state(struct nfs_server *server) @@ -272,7 +266,7 @@ nfs4_alloc_state_owner(void) sp = kmalloc(sizeof(*sp),GFP_KERNEL); if (!sp) return NULL; - init_MUTEX(&sp->so_sema); + iosem_init(&sp->so_iosem); sp->so_seqid = 0; /* arbitrary */ INIT_LIST_HEAD(&sp->so_states); INIT_LIST_HEAD(&sp->so_delegations); @@ -364,7 +358,7 @@ nfs4_alloc_open_state(void) memset(state->stateid.data, 0, sizeof(state->stateid.data)); atomic_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); - init_MUTEX(&state->lock_sema); + iosem_init(&state->lock_iosem); rwlock_init(&state->state_lock); return state; } @@ -466,7 +460,7 @@ out: /* * Beware! Caller must be holding exactly one - * reference to clp->cl_sem and owner->so_sema! + * reference to clp->cl_sem and the state owner lock! */ void nfs4_put_open_state(struct nfs4_state *state) { @@ -485,20 +479,19 @@ void nfs4_put_open_state(struct nfs4_sta nfs4_put_state_owner(owner); } -/* - * Beware! Caller must be holding no references to clp->cl_sem! - * of owner->so_sema! - */ -void nfs4_close_state(struct nfs4_state *state, mode_t mode) +struct nfs4_close_state_args { + struct iosem_work work; + struct nfs4_state *state; + mode_t mode; +}; + +static inline void __nfs4_close_state(struct nfs4_state *state, mode_t mode) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; struct nfs4_client *clp = owner->so_client; int newstate; - atomic_inc(&owner->so_count); - down_read(&clp->cl_sem); - down(&owner->so_sema); /* Protect against nfs4_find_state() */ spin_lock(&inode->i_lock); if (mode & FMODE_READ) @@ -525,11 +518,42 @@ void nfs4_close_state(struct nfs4_state } out: nfs4_put_open_state(state); - up(&owner->so_sema); + nfs_unlock_state_owner(owner); nfs4_put_state_owner(owner); up_read(&clp->cl_sem); } +static void nfs4_close_state_func(void *data) +{ + struct nfs4_close_state_args *args = (struct nfs4_close_state_args *)data; + + __nfs4_close_state(args->state, args->mode); + kfree(args); +} + +/* + * Beware! Caller must be holding no references to clp->cl_sem! + * or state owner lock! + */ +void nfs4_close_state(struct nfs4_state *state, mode_t mode) +{ + struct nfs4_state_owner *owner = state->owner; + struct nfs4_client *clp = owner->so_client; + struct nfs4_close_state_args *args; + + args = kmalloc(sizeof(*args), GFP_NOFS); + if (args == NULL) { + printk("%s: failed. Out of memory\n", __FUNCTION__); + return; + } + atomic_inc(&owner->so_count); + down_read(&clp->cl_sem); + args->state = state; + args->mode = mode; + iosem_work_init(&args->work, nfs4_close_state_func, args); + iosem_lock_and_schedule_work(&owner->so_iosem, &args->work); +} + /* * Search the state->lock_states for an existing lock_owner * that is compatible with current->files @@ -561,7 +585,7 @@ nfs4_find_lock_state(struct nfs4_state * * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. * - * The caller must be holding state->lock_sema + * The caller must be holding state->lock_iosem */ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { @@ -588,7 +612,7 @@ static struct nfs4_lock_state *nfs4_allo * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. * - * The caller must be holding state->lock_sema and clp->cl_sem + * The caller must be holding state->lock_iosem and clp->cl_sem */ struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) { @@ -621,7 +645,7 @@ nfs4_copy_stateid(nfs4_stateid *dst, str } /* -* Called with state->lock_sema and clp->cl_sem held. +* Called with state->lock_iosem and clp->cl_sem held. */ void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) { @@ -708,7 +732,7 @@ nfs4_put_lock_state(struct nfs4_lock_sta } /* -* Called with sp->so_sema and clp->cl_sem held. +* Called with state owner lock and clp->cl_sem held. * * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or * failed with a seqid incrementing error - Index: linux-2.6.12-rc3/fs/nfs/nfs4xdr.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/nfs4xdr.c +++ linux-2.6.12-rc3/fs/nfs/nfs4xdr.c @@ -51,6 +51,7 @@ #include #include #include +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -82,12 +83,16 @@ static int nfs_stat_to_errno(int); #define encode_getfh_maxsz (op_encode_hdr_maxsz) #define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \ ((3+NFS4_FHSIZE) >> 2)) -#define encode_getattr_maxsz (op_encode_hdr_maxsz + 3) +#define nfs4_fattr_bitmap_maxsz 3 +#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) -#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz) -#define decode_getattr_maxsz (op_decode_hdr_maxsz + 3 + \ - nfs4_fattr_bitmap_maxsz) +/* This is based on getfattr, which uses the most attributes: */ +#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ + 3 + 3 + 3 + 2 * nfs4_name_maxsz)) +#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_value_maxsz) +#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) #define encode_savefh_maxsz (op_encode_hdr_maxsz) #define decode_savefh_maxsz (op_decode_hdr_maxsz) #define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) @@ -122,11 +127,11 @@ static int nfs_stat_to_errno(int); #define encode_symlink_maxsz (op_encode_hdr_maxsz + \ 1 + nfs4_name_maxsz + \ nfs4_path_maxsz + \ - nfs4_fattr_bitmap_maxsz) + nfs4_fattr_maxsz) #define decode_symlink_maxsz (op_decode_hdr_maxsz + 8) #define encode_create_maxsz (op_encode_hdr_maxsz + \ 2 + nfs4_name_maxsz + \ - nfs4_fattr_bitmap_maxsz) + nfs4_fattr_maxsz) #define decode_create_maxsz (op_decode_hdr_maxsz + 8) #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) @@ -205,7 +210,7 @@ static int nfs_stat_to_errno(int); #define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + 4 + \ - nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ @@ -360,6 +365,29 @@ static int nfs_stat_to_errno(int); encode_delegreturn_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ decode_delegreturn_maxsz) +#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_getacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz + 1) +#define NFS4_enc_setacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 4 + \ + nfs4_fattr_bitmap_maxsz + 1) +#define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) +#define NFS4_enc_fs_locations_sz \ + (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_fs_locations_sz \ + (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz) static struct { unsigned int mode; @@ -459,7 +487,7 @@ static int encode_attrs(struct xdr_strea * In the worst-case, this would be * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) * = 36 bytes, plus any contribution from variable-length fields - * such as owner/group/acl's. + * such as owner/group. */ len = 16; @@ -660,8 +688,6 @@ static int encode_getattr_two(struct xdr static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) { - extern u32 nfs4_fattr_bitmap[]; - return encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], bitmask[1] & nfs4_fattr_bitmap[1]); @@ -669,8 +695,6 @@ static int encode_getfattr(struct xdr_st static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask) { - extern u32 nfs4_fsinfo_bitmap[]; - return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], bitmask[1] & nfs4_fsinfo_bitmap[1]); } @@ -969,7 +993,6 @@ static int encode_putrootfh(struct xdr_s static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) { - extern nfs4_stateid zero_stateid; nfs4_stateid stateid; uint32_t *p; @@ -1089,6 +1112,25 @@ static int encode_renew(struct xdr_strea } static int +encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(4+sizeof(zero_stateid.data)); + WRITE32(OP_SETATTR); + WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); + RESERVE_SPACE(2*4); + WRITE32(1); + WRITE32(FATTR4_WORD0_ACL); + if (arg->acl_len % 4) + return -EINVAL; + RESERVE_SPACE(4); + WRITE32(arg->acl_len); + xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + return 0; +} + +static int encode_savefh(struct xdr_stream *xdr) { uint32_t *p; @@ -1632,6 +1674,34 @@ out: } /* + * Encode a GETACL request + */ +static int +nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p, + struct nfs_getaclargs *args) +{ + struct xdr_stream xdr; + struct rpc_auth *auth = req->rq_task->tk_auth; + struct compound_hdr hdr = { + .nops = 2, + }; + int replen, status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0); + /* set up reply buffer: */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->acl_pages, args->acl_pgbase, args->acl_len); +out: + return status; +} + +/* * Encode a WRITE request */ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args) @@ -1697,7 +1767,6 @@ static int nfs4_xdr_enc_fsinfo(struct rp */ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args) { - extern u32 nfs4_pathconf_bitmap[2]; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -1718,7 +1787,6 @@ static int nfs4_xdr_enc_pathconf(struct */ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args) { - extern u32 nfs4_statfs_bitmap[]; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -1828,6 +1896,38 @@ static int nfs4_xdr_enc_delegreturn(stru } /* + * Encode FS_LOCATIONS request + */ +static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs_locations_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + struct rpc_auth *auth = req->rq_task->tk_auth; + int replen; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) + goto out; + if ((status = encode_lookup(&xdr, args->name)) != 0) + goto out; + if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) + goto out; + /* set up reply + * toplevel_status + taglen + rescount + OP_PUTFH + status + * + OP_LOOKUP + status + OP_GETATTR + status = 7 + */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page, + 0, PAGE_SIZE); +out: + return status; +} + +/* * START OF "GENERIC" DECODE ROUTINES. * These may look a little ugly since they are imported from a "generic" * set of XDR encode/decode routines which are intended to be shared by @@ -1861,7 +1961,7 @@ static int nfs4_xdr_enc_delegreturn(stru } \ } while (0) -static int decode_opaque_inline(struct xdr_stream *xdr, uint32_t *len, char **string) +static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string) { uint32_t *p; @@ -1912,7 +2012,7 @@ static int decode_op_hdr(struct xdr_stre static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp) { uint32_t *p; - uint32_t strlen; + unsigned int strlen; char *str; READ_BUF(12); @@ -2042,7 +2142,7 @@ static int decode_attr_symlink_support(s return 0; } -static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fsid *fsid) +static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid) { uint32_t *p; @@ -2161,6 +2261,45 @@ static int decode_attr_files_total(struc return status; } +static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fs_locations *res) +{ + int n; + uint32_t *p; + int status = -EIO; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_FS_LOCATIONS -1U))) + goto out; + status = 0; + if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) + goto out; + status = decode_opaque_inline(xdr, &res->fs_pathlen, &res->fs_path); + if (unlikely(status != 0)) + goto out; + READ_BUF(4); + READ32(n); + if (n <= 0) + goto out_eio; + res->nlocations = 0; + while (res->nlocations < n) { + struct nfs_fs_location *loc = &res->locations[res->nlocations]; + + status = decode_opaque_inline(xdr, &loc->serverlen, &loc->server); + if (unlikely(status != 0)) + goto out_eio; + status = decode_opaque_inline(xdr, &loc->rootpathlen, &loc->rootpath); + if (unlikely(status != 0)) + goto out_eio; + if (res->nlocations < NFS_FS_LOCATIONS_MAXENTRIES) + res->nlocations++; + } +out: + dprintk("%s: fs_locations done, error = %d\n", __FUNCTION__, status); + return status; +out_eio: + status = -EIO; + goto out; +} + static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) { uint32_t *p; @@ -2691,10 +2830,14 @@ static int decode_getfattr(struct xdr_st goto xdr_error; if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0) goto xdr_error; - if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid_u.nfs4)) != 0) + if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid)) != 0) goto xdr_error; if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0) goto xdr_error; + if ((status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, + struct nfs_fs_locations, + fattr))) != 0) + goto xdr_error; if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0) goto xdr_error; fattr->mode |= fmode; @@ -3127,6 +3270,46 @@ static int decode_renew(struct xdr_strea return decode_op_hdr(xdr, OP_RENEW); } +static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, + size_t *acl_len) +{ + uint32_t *savep; + uint32_t attrlen, + bitmap[2] = {0}; + struct kvec *iov = req->rq_rcv_buf.head; + int status; + + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto out; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto out; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto out; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { + int hdrlen, recvd; + + /* We ignore &savep and don't do consistency checks on + * the attr length. Let userspace figure it out.... */ + hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; + recvd = req->rq_rcv_buf.len - hdrlen; + if (attrlen > recvd) { + printk(KERN_WARNING "NFS: server cheating in getattr" + " acl reply: attrlen %u > recvd %u\n", + attrlen, recvd); + return -EINVAL; + } + if (attrlen <= *acl_len) + xdr_read_pages(xdr, attrlen); + *acl_len = attrlen; + } + +out: + return status; +} + static int decode_savefh(struct xdr_stream *xdr) { @@ -3418,6 +3601,71 @@ out: } +/* + * Encode an SETACL request + */ +static int +nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_setacl(&xdr, args); +out: + return status; +} +/* + * Decode SETACL response + */ +static int +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_setattr(&xdr, res); +out: + return status; +} + +/* + * Decode GETACL response + */ +static int +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, size_t *acl_len) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getacl(&xdr, rqstp, acl_len); + +out: + return status; +} /* * Decode CLOSE response @@ -3860,6 +4108,29 @@ static int nfs4_xdr_dec_delegreturn(stru return status; } +/* + * FS_LOCATIONS request + */ +static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs_fs_locations *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status != 0) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_lookup(&xdr)) != 0) + goto out; + xdr_enter_page(&xdr, PAGE_SIZE); + status = decode_getfattr(&xdr, &res->fattr, res->server); +out: + return status; +} + uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) { uint32_t bitmap[2] = {0}; @@ -4019,6 +4290,9 @@ struct rpc_procinfo nfs4_procedures[] = PROC(READDIR, enc_readdir, dec_readdir), PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), + PROC(GETACL, enc_getacl, dec_getacl), + PROC(SETACL, enc_setacl, dec_setacl), + PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), }; struct rpc_version nfs_version4 = { Index: linux-2.6.12-rc3/fs/nfs/nfs_iostat.h =================================================================== --- /dev/null +++ linux-2.6.12-rc3/fs/nfs/nfs_iostat.h @@ -0,0 +1,86 @@ +#ifndef _NFS_IOSTAT +#define _NFS_IOSTAT + +#define NFS_IOSTAT_VERS "1.0" + +enum nfs_stat_bytecounters { + NFSIOS_NORMALREADBYTES = 0, + NFSIOS_NORMALWRITTENBYTES, + NFSIOS_DIRECTREADBYTES, + NFSIOS_DIRECTWRITTENBYTES, + NFSIOS_SERVERREADBYTES, + NFSIOS_SERVERWRITTENBYTES, + NFSIOS_WAITEVENTJIFFIES, + __NFSIOS_BYTESMAX, +}; + +enum nfs_stat_eventcounters { + NFSIOS_WAITEVENT = 0, + NFSIOS_INODEREVALIDATE, + NFSIOS_DENTRYREVALIDATE, + NFSIOS_DATAINVALIDATE, + NFSIOS_ATTRINVALIDATE, + NFSIOS_VFSOPEN, + NFSIOS_VFSLOOKUP, + NFSIOS_VFSACCESS, + NFSIOS_VFSREADPAGE, + NFSIOS_VFSREADPAGES, + NFSIOS_VFSWRITEPAGE, + NFSIOS_VFSWRITEPAGES, + NFSIOS_VFSGETDENTS, + NFSIOS_VFSFLUSH, + NFSIOS_VFSFSYNC, + NFSIOS_VFSLOCK, + NFSIOS_VFSRELEASE, + NFSIOS_SETATTRTRUNC, + NFSIOS_EXTENDWRITE, + NFSIOS_SILLYRENAME, + NFSIOS_SHORTREAD, + NFSIOS_SHORTWRITE, + __NFSIOS_COUNTSMAX, +}; + +#ifdef __KERNEL__ + +#include +#include + +struct nfs_iostats { + unsigned long long bytes[__NFSIOS_BYTESMAX]; + unsigned long events[__NFSIOS_COUNTSMAX]; +} ____cacheline_aligned; + +static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat) +{ + struct nfs_iostats *iostats; + int cpu; + + cpu = get_cpu(); + iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); + iostats->events[stat] ++; + put_cpu_no_resched(); +} + +static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend) +{ + struct nfs_iostats *iostats; + int cpu; + + cpu = get_cpu(); + iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); + iostats->bytes[stat] += addend; + put_cpu_no_resched(); +} + +static inline struct nfs_iostats *nfs_alloc_iostats(void) +{ + return alloc_percpu(struct nfs_iostats); +} + +static inline void nfs_free_iostats(struct nfs_iostats *stats) +{ + free_percpu(stats); +} + +#endif +#endif Index: linux-2.6.12-rc3/fs/nfs/nfsroot.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/nfsroot.c +++ linux-2.6.12-rc3/fs/nfs/nfsroot.c @@ -124,6 +124,7 @@ enum { Opt_soft, Opt_hard, Opt_intr, Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, + Opt_acl, Opt_noacl, /* Error token */ Opt_err }; @@ -158,6 +159,8 @@ static match_table_t __initdata tokens = {Opt_udp, "udp"}, {Opt_tcp, "proto=tcp"}, {Opt_tcp, "tcp"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, {Opt_err, NULL} }; @@ -266,6 +269,12 @@ static int __init root_nfs_parse(char *n case Opt_tcp: nfs_data.flags |= NFS_MOUNT_TCP; break; + case Opt_acl: + nfs_data.flags &= ~NFS_MOUNT_NOACL; + break; + case Opt_noacl: + nfs_data.flags |= NFS_MOUNT_NOACL; + break; default : return 0; } Index: linux-2.6.12-rc3/fs/nfs/pagelist.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/pagelist.c +++ linux-2.6.12-rc3/fs/nfs/pagelist.c @@ -17,6 +17,7 @@ #include #include #include +#include "nfs_iostat.h" #include #define NFS_PARANOIA 1 @@ -107,11 +108,38 @@ void nfs_unlock_request(struct nfs_page smp_mb__before_clear_bit(); clear_bit(PG_BUSY, &req->wb_flags); smp_mb__after_clear_bit(); - wake_up_all(&req->wb_context->waitq); + wake_up_bit(&req->wb_flags, PG_BUSY); nfs_release_request(req); } /** + * nfs_set_page_writeback_locked - Lock a request for writeback + * @req: + */ +int nfs_set_page_writeback_locked(struct nfs_page *req) +{ + struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + + if (!nfs_lock_request(req)) + return 0; + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + return 1; +} + +/** + * nfs_clear_page_writeback - Unlock request and wake up sleepers + */ +void nfs_clear_page_writeback(struct nfs_page *req) +{ + struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + + spin_lock(&nfsi->req_lock); + radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + spin_unlock(&nfsi->req_lock); + nfs_unlock_request(req); +} + +/** * nfs_clear_request - Free up all resources allocated to the request * @req: * @@ -150,34 +178,15 @@ nfs_release_request(struct nfs_page *req nfs_page_free(req); } -/** - * nfs_list_add_request - Insert a request into a sorted list - * @req: request - * @head: head of list into which to insert the request. - * - * Note that the wb_list is sorted by page index in order to facilitate - * coalescing of requests. - * We use an insertion sort that is optimized for the case of appended - * writes. - */ -void -nfs_list_add_request(struct nfs_page *req, struct list_head *head) +static int nfs_wait_bit_interruptible(void *word) { - struct list_head *pos; + int ret = 0; -#ifdef NFS_PARANOIA - if (!list_empty(&req->wb_list)) { - printk(KERN_ERR "NFS: Add to list failed!\n"); - BUG(); - } -#endif - list_for_each_prev(pos, head) { - struct nfs_page *p = nfs_list_entry(pos); - if (p->wb_index < req->wb_index) - break; - } - list_add(&req->wb_list, pos); - req->wb_list_head = head; + if (signal_pending(current)) + ret = -ERESTARTSYS; + else + schedule(); + return ret; } /** @@ -190,12 +199,26 @@ nfs_list_add_request(struct nfs_page *re int nfs_wait_on_request(struct nfs_page *req) { - struct inode *inode = req->wb_context->dentry->d_inode; - struct rpc_clnt *clnt = NFS_CLIENT(inode); - - if (!NFS_WBACK_BUSY(req)) - return 0; - return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req)); + struct inode *inode = req->wb_context->dentry->d_inode; + struct rpc_clnt *clnt = NFS_CLIENT(inode); + sigset_t oldmask; + unsigned long start = jiffies; + int ret = 0; + + if (!test_bit(PG_BUSY, &req->wb_flags)) + goto out; + /* + * Note: the call to rpc_clnt_sigmask() suffices to ensure that we + * are not interrupted if intr flag is not set + */ + rpc_clnt_sigmask(clnt, &oldmask); + ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY, + nfs_wait_bit_interruptible, TASK_INTERRUPTIBLE); + nfs_add_stats(inode, NFSIOS_WAITEVENTJIFFIES, (jiffies - start)); + nfs_inc_stats(inode, NFSIOS_WAITEVENT); + rpc_clnt_sigunmask(clnt, &oldmask); +out: + return ret; } /** @@ -243,6 +266,62 @@ nfs_coalesce_requests(struct list_head * return npages; } +#define NFS_SCAN_MAXENTRIES 16 +/** + * nfs_scan_lock_dirty - Scan the radix tree for dirty requests + * @nfsi: NFS inode + * @dst: Destination list + * @idx_start: lower bound of page->index to scan + * @npages: idx_start + npages sets the upper bound to scan. + * + * Moves elements from one of the inode request lists. + * If the number of requests is set to 0, the entire address_space + * starting at index idx_start, is scanned. + * The requests are *not* checked to ensure that they form a contiguous set. + * You must be holding the inode's req_lock when calling this function + */ +int +nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, + unsigned long idx_start, unsigned int npages) +{ + struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; + struct nfs_page *req; + unsigned long idx_end; + int found, i; + int res; + + res = 0; + if (npages == 0) + idx_end = ~0; + else + idx_end = idx_start + npages - 1; + + for (;;) { + found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, + (void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES, + NFS_PAGE_TAG_DIRTY); + if (found <= 0) + break; + for (i = 0; i < found; i++) { + req = pgvec[i]; + if (req->wb_index > idx_end) + goto out; + + idx_start = req->wb_index + 1; + + if (nfs_set_page_writeback_locked(req)) { + radix_tree_tag_clear(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + res++; + } + } + } +out: + return res; +} + /** * nfs_scan_list - Scan a list for matching requests * @head: One of the NFS inode request lists @@ -280,7 +359,7 @@ nfs_scan_list(struct list_head *head, st if (req->wb_index > idx_end) break; - if (!nfs_lock_request(req)) + if (!nfs_set_page_writeback_locked(req)) continue; nfs_list_remove_request(req); nfs_list_add_request(req, dst); Index: linux-2.6.12-rc3/fs/nfs/proc.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/proc.c +++ linux-2.6.12-rc3/fs/nfs/proc.c @@ -214,7 +214,7 @@ static int nfs_proc_write(struct nfs_wri static int nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs_fh fhandle; struct nfs_fattr fattr; @@ -622,6 +622,7 @@ struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs_proc_get_root, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, Index: linux-2.6.12-rc3/fs/nfs/read.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/read.c +++ linux-2.6.12-rc3/fs/nfs/read.c @@ -26,6 +26,7 @@ #include #include #include +#include "nfs_iostat.h" #include #include @@ -134,6 +135,8 @@ static int nfs_readpage_sync(struct nfs_ } count -= result; rdata->args.pgbase += result; + nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result); + /* Note: result == 0 should only happen if we're caching * a write that extends the file and punches a hole. */ @@ -173,7 +176,6 @@ static int nfs_readpage_async(struct nfs if (len < PAGE_CACHE_SIZE) memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); - nfs_lock_request(new); nfs_list_add_request(new, &one_request); nfs_pagein_one(&one_request, inode); return 0; @@ -185,7 +187,6 @@ static void nfs_readpage_release(struct nfs_clear_request(req); nfs_release_request(req); - nfs_unlock_request(req); dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, @@ -462,8 +463,11 @@ void nfs_readpage_result(struct rpc_task dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", task->tk_pid, status); + nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count); + /* Is this a short read? */ if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) { + nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ if (resp->count != 0) { /* Yes, so retry the read at the end of the data */ @@ -493,6 +497,8 @@ int nfs_readpage(struct file *file, stru dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", page, PAGE_CACHE_SIZE, page->index); + nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); + /* * Try to flush any pending writes to the file.. * @@ -553,7 +559,6 @@ readpage_async_filler(void *data, struct } if (len < PAGE_CACHE_SIZE) memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); - nfs_lock_request(new); nfs_list_add_request(new, desc->head); return 0; } @@ -573,6 +578,7 @@ int nfs_readpages(struct file *filp, str inode->i_sb->s_id, (long long)NFS_FILEID(inode), nr_pages); + nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); if (filp == NULL) { desc.ctx = nfs_find_open_context(inode, FMODE_READ); Index: linux-2.6.12-rc3/fs/nfs/write.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfs/write.c +++ linux-2.6.12-rc3/fs/nfs/write.c @@ -57,6 +57,7 @@ #include #include +#include "nfs_iostat.h" #include #include #include @@ -122,6 +123,7 @@ static void nfs_grow_file(struct page *p end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); if (i_size >= end) return; + nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); i_size_write(inode, end); } @@ -210,6 +212,7 @@ static int nfs_writepage_sync(struct nfs wdata->args.pgbase += result; written += result; count -= result; + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result); } while (count); /* Update file length */ nfs_grow_file(page, offset, written); @@ -220,7 +223,7 @@ static int nfs_writepage_sync(struct nfs ClearPageError(page); io_error: - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); nfs_writedata_free(wdata); return written ? written : result; } @@ -268,6 +271,8 @@ int nfs_writepage(struct page *page, str int priority = wb_priority(wbc); int err; + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); + /* * Note: We need to ensure that we have a reference to the inode * if we are to do asynchronous writes. If not, waiting @@ -335,6 +340,8 @@ int nfs_writepages(struct address_space struct inode *inode = mapping->host; int err; + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); + err = generic_writepages(mapping, wbc); if (err) return err; @@ -352,7 +359,7 @@ int nfs_writepages(struct address_space if (err < 0) goto out; } - err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc)); + err = nfs_commit_inode(inode, wb_priority(wbc)); if (err > 0) { wbc->nr_to_write -= err; err = 0; @@ -401,7 +408,7 @@ static void nfs_inode_remove_request(str nfsi->npages--; if (!nfsi->npages) { spin_unlock(&nfsi->req_lock); - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); iput(inode); } else spin_unlock(&nfsi->req_lock); @@ -446,6 +453,8 @@ nfs_mark_request_dirty(struct nfs_page * struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfsi->req_lock); + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); nfs_list_add_request(req, &nfsi->dirty); nfsi->ndirty++; spin_unlock(&nfsi->req_lock); @@ -503,13 +512,12 @@ nfs_wait_on_requests(struct inode *inode spin_lock(&nfsi->req_lock); next = idx_start; - while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) { + while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { if (req->wb_index > idx_end) break; next = req->wb_index + 1; - if (!NFS_WBACK_BUSY(req)) - continue; + BUG_ON(!NFS_WBACK_BUSY(req)); atomic_inc(&req->wb_count); spin_unlock(&nfsi->req_lock); @@ -538,12 +546,15 @@ static int nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res; - res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages); - nfsi->ndirty -= res; - sub_page_state(nr_dirty,res); - if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + int res = 0; + + if (nfsi->ndirty != 0) { + res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages); + nfsi->ndirty -= res; + sub_page_state(nr_dirty,res); + if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + } return res; } @@ -562,11 +573,14 @@ static int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res; - res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); - nfsi->ncommit -= res; - if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + int res = 0; + + if (nfsi->ncommit != 0) { + res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); + nfsi->ncommit -= res; + if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + } return res; } #endif @@ -750,7 +764,7 @@ int nfs_updatepage(struct file *file, st * is entirely in cache, it may be more efficient to avoid * fragmenting write requests. */ - if (PageUptodate(page) && inode->i_flock == NULL) { + if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) { loff_t end_offs = i_size_read(inode) - 1; unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT; @@ -821,7 +835,7 @@ out: #else nfs_inode_remove_request(req); #endif - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } static inline int flush_task_priority(int how) @@ -952,7 +966,7 @@ out_bad: nfs_writedata_free(data); } nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); return -ENOMEM; } @@ -1002,7 +1016,7 @@ static int nfs_flush_one(struct list_hea struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return -ENOMEM; } @@ -1029,7 +1043,7 @@ nfs_flush_list(struct list_head *head, i req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return error; } @@ -1121,7 +1135,7 @@ static void nfs_writeback_done_full(stru nfs_inode_remove_request(req); #endif next: - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } } @@ -1137,6 +1151,8 @@ void nfs_writeback_done(struct rpc_task dprintk("NFS: %4d nfs_writeback_done (status %d)\n", task->tk_pid, task->tk_status); + nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); + #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (resp->verf->committed < argp->stable && task->tk_status >= 0) { /* We tried a write call, but the server did not @@ -1162,6 +1178,8 @@ void nfs_writeback_done(struct rpc_task if (task->tk_status >= 0 && resp->count < argp->count) { static unsigned long complain; + nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); + /* Has the server at least made some progress? */ if (resp->count != 0) { /* Was this an NFSv2 write or an NFSv3 stable write? */ @@ -1210,36 +1228,24 @@ static void nfs_commit_rpcsetup(struct l struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; - struct nfs_page *first, *last; + struct nfs_page *first; struct inode *inode; - loff_t start, end, len; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ list_splice_init(head, &data->pages); first = nfs_list_entry(data->pages.next); - last = nfs_list_entry(data->pages.prev); inode = first->wb_context->dentry->d_inode; - /* - * Determine the offset range of requests in the COMMIT call. - * We rely on the fact that data->pages is an ordered list... - */ - start = req_offset(first); - end = req_offset(last) + last->wb_bytes; - len = end - start; - /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */ - if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1)) - len = 0; - data->inode = inode; data->cred = first->wb_context->cred; data->args.fh = NFS_FH(data->inode); - data->args.offset = start; - data->args.count = len; - data->res.count = len; + /* Note: we always request a commit of the entire inode */ + data->args.offset = 0; + data->args.count = 0; + data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -1278,7 +1284,7 @@ nfs_commit_list(struct list_head *head, req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_commit(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return -ENOMEM; } @@ -1324,7 +1330,7 @@ nfs_commit_done(struct rpc_task *task) dprintk(" mismatch\n"); nfs_mark_request_dirty(req); next: - nfs_unlock_request(req); + nfs_clear_page_writeback(req); res++; } sub_page_state(nr_unstable,res); @@ -1342,16 +1348,23 @@ static int nfs_flush_inode(struct inode spin_lock(&nfsi->req_lock); res = nfs_scan_dirty(inode, &head, idx_start, npages); spin_unlock(&nfsi->req_lock); - if (res) - error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how); + if (res) { + struct nfs_server *server = NFS_SERVER(inode); + + /* For single writes, FLUSH_STABLE is more efficient */ + if (res == nfsi->npages && nfsi->npages <= server->wpages) { + if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize) + how |= FLUSH_STABLE; + } + error = nfs_flush_list(&head, server->wpages, how); + } if (error < 0) return error; return res; } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -int nfs_commit_inode(struct inode *inode, unsigned long idx_start, - unsigned int npages, int how) +int nfs_commit_inode(struct inode *inode, int how) { struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); @@ -1359,15 +1372,13 @@ int nfs_commit_inode(struct inode *inode error = 0; spin_lock(&nfsi->req_lock); - res = nfs_scan_commit(inode, &head, idx_start, npages); + res = nfs_scan_commit(inode, &head, 0, 0); + spin_unlock(&nfsi->req_lock); if (res) { - res += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfsi->req_lock); error = nfs_commit_list(&head, how); - } else - spin_unlock(&nfsi->req_lock); - if (error < 0) - return error; + if (error < 0) + return error; + } return res; } #endif @@ -1389,7 +1400,7 @@ int nfs_sync_inode(struct inode *inode, error = nfs_flush_inode(inode, idx_start, npages, how); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (error == 0) - error = nfs_commit_inode(inode, idx_start, npages, how); + error = nfs_commit_inode(inode, how); #endif } while (error > 0); return error; Index: linux-2.6.12-rc3/fs/nfs_common/Makefile =================================================================== --- /dev/null +++ linux-2.6.12-rc3/fs/nfs_common/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for Linux filesystem routines that are shared by client and server. +# + +obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o + +nfs_acl-objs := nfsacl.o Index: linux-2.6.12-rc3/fs/nfs_common/nfsacl.c =================================================================== --- /dev/null +++ linux-2.6.12-rc3/fs/nfs_common/nfsacl.c @@ -0,0 +1,257 @@ +/* + * fs/nfs_common/nfsacl.c + * + * Copyright (C) 2002-2003 Andreas Gruenbacher + */ + +/* + * The Solaris nfsacl protocol represents some ACLs slightly differently + * than POSIX 1003.1e draft 17 does (and we do): + * + * - Minimal ACLs always have an ACL_MASK entry, so they have + * four instead of three entries. + * - The ACL_MASK entry in such minimal ACLs always has the same + * permissions as the ACL_GROUP_OBJ entry. (In extended ACLs + * the ACL_MASK and ACL_GROUP_OBJ entries may differ.) + * - The identifier fields of the ACL_USER_OBJ and ACL_GROUP_OBJ + * entries contain the identifiers of the owner and owning group. + * (In POSIX ACLs we always set them to ACL_UNDEFINED_ID). + * - ACL entries in the kernel are kept sorted in ascending order + * of (e_tag, e_id). Solaris ACLs are unsorted. + */ + +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); + +EXPORT_SYMBOL(nfsacl_encode); +EXPORT_SYMBOL(nfsacl_decode); + +struct nfsacl_encode_desc { + struct xdr_array2_desc desc; + unsigned int count; + struct posix_acl *acl; + int typeflag; + uid_t uid; + gid_t gid; +}; + +static int +xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) +{ + struct nfsacl_encode_desc *nfsacl_desc = + (struct nfsacl_encode_desc *) desc; + u32 *p = (u32 *) elem; + + if (nfsacl_desc->count < nfsacl_desc->acl->a_count) { + struct posix_acl_entry *entry = + &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; + + *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag); + switch(entry->e_tag) { + case ACL_USER_OBJ: + *p++ = htonl(nfsacl_desc->uid); + break; + case ACL_GROUP_OBJ: + *p++ = htonl(nfsacl_desc->gid); + break; + case ACL_USER: + case ACL_GROUP: + *p++ = htonl(entry->e_id); + break; + default: /* Solaris depends on that! */ + *p++ = 0; + break; + } + *p++ = htonl(entry->e_perm & S_IRWXO); + } else { + const struct posix_acl_entry *pa, *pe; + int group_obj_perm = ACL_READ|ACL_WRITE|ACL_EXECUTE; + + FOREACH_ACL_ENTRY(pa, nfsacl_desc->acl, pe) { + if (pa->e_tag == ACL_GROUP_OBJ) { + group_obj_perm = pa->e_perm & S_IRWXO; + break; + } + } + /* fake up ACL_MASK entry */ + *p++ = htonl(ACL_MASK | nfsacl_desc->typeflag); + *p++ = htonl(0); + *p++ = htonl(group_obj_perm); + } + + return 0; +} + +unsigned int +nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, + struct posix_acl *acl, int encode_entries, int typeflag) +{ + int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0; + struct nfsacl_encode_desc nfsacl_desc = { + .desc = { + .elem_size = 12, + .array_len = encode_entries ? entries : 0, + .xcode = xdr_nfsace_encode, + }, + .acl = acl, + .typeflag = typeflag, + .uid = inode->i_uid, + .gid = inode->i_gid, + }; + int err; + + if (entries > NFS_ACL_MAX_ENTRIES || + xdr_encode_word(buf, base, entries)) + return -EINVAL; + err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc); + if (!err) + err = 8 + nfsacl_desc.desc.elem_size * + nfsacl_desc.desc.array_len; + return err; +} + +struct nfsacl_decode_desc { + struct xdr_array2_desc desc; + unsigned int count; + struct posix_acl *acl; +}; + +static int +xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem) +{ + struct nfsacl_decode_desc *nfsacl_desc = + (struct nfsacl_decode_desc *) desc; + u32 *p = (u32 *) elem; + struct posix_acl_entry *entry; + + if (!nfsacl_desc->acl) { + if (desc->array_len > NFS_ACL_MAX_ENTRIES) + return -EINVAL; + nfsacl_desc->acl = posix_acl_alloc(desc->array_len, GFP_KERNEL); + if (!nfsacl_desc->acl) + return -ENOMEM; + nfsacl_desc->count = 0; + } + + entry = &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; + entry->e_tag = ntohl(*p++) & ~NFS_ACL_DEFAULT; + entry->e_id = ntohl(*p++); + entry->e_perm = ntohl(*p++); + + switch(entry->e_tag) { + case ACL_USER_OBJ: + case ACL_USER: + case ACL_GROUP_OBJ: + case ACL_GROUP: + case ACL_OTHER: + if (entry->e_perm & ~S_IRWXO) + return -EINVAL; + break; + case ACL_MASK: + /* Solaris sometimes sets additonal bits in the mask */ + entry->e_perm &= S_IRWXO; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +cmp_acl_entry(const void *x, const void *y) +{ + const struct posix_acl_entry *a = x, *b = y; + + if (a->e_tag != b->e_tag) + return a->e_tag - b->e_tag; + else if (a->e_id > b->e_id) + return 1; + else if (a->e_id < b->e_id) + return -1; + else + return 0; +} + +/* + * Convert from a Solaris ACL to a POSIX 1003.1e draft 17 ACL. + */ +static int +posix_acl_from_nfsacl(struct posix_acl *acl) +{ + struct posix_acl_entry *pa, *pe, + *group_obj = NULL, *mask = NULL; + + if (!acl) + return 0; + + sort(acl->a_entries, acl->a_count, sizeof(struct posix_acl_entry), + cmp_acl_entry, NULL); + + /* Clear undefined identifier fields and find the ACL_GROUP_OBJ + and ACL_MASK entries. */ + FOREACH_ACL_ENTRY(pa, acl, pe) { + switch(pa->e_tag) { + case ACL_USER_OBJ: + pa->e_id = ACL_UNDEFINED_ID; + break; + case ACL_GROUP_OBJ: + pa->e_id = ACL_UNDEFINED_ID; + group_obj = pa; + break; + case ACL_MASK: + mask = pa; + /* fall through */ + case ACL_OTHER: + pa->e_id = ACL_UNDEFINED_ID; + break; + } + } + if (acl->a_count == 4 && group_obj && mask && + mask->e_perm == group_obj->e_perm) { + /* remove bogus ACL_MASK entry */ + memmove(mask, mask+1, (3 - (mask - acl->a_entries)) * + sizeof(struct posix_acl_entry)); + acl->a_count = 3; + } + return 0; +} + +unsigned int +nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, + struct posix_acl **pacl) +{ + struct nfsacl_decode_desc nfsacl_desc = { + .desc = { + .elem_size = 12, + .xcode = pacl ? xdr_nfsace_decode : NULL, + }, + }; + u32 entries; + int err; + + if (xdr_decode_word(buf, base, &entries) || + entries > NFS_ACL_MAX_ENTRIES) + return -EINVAL; + err = xdr_decode_array2(buf, base + 4, &nfsacl_desc.desc); + if (err) + return err; + if (pacl) { + if (entries != nfsacl_desc.desc.array_len || + posix_acl_from_nfsacl(nfsacl_desc.acl) != 0) { + posix_acl_release(nfsacl_desc.acl); + return -EINVAL; + } + *pacl = nfsacl_desc.acl; + } + if (aclcnt) + *aclcnt = entries; + return 8 + nfsacl_desc.desc.elem_size * + nfsacl_desc.desc.array_len; +} Index: linux-2.6.12-rc3/fs/nfsd/Makefile =================================================================== --- linux-2.6.12-rc3.orig/fs/nfsd/Makefile +++ linux-2.6.12-rc3/fs/nfsd/Makefile @@ -6,7 +6,9 @@ obj-$(CONFIG_NFSD) += nfsd.o nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o +nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o +nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ nfs4acl.o nfs4callback.o nfsd-objs := $(nfsd-y) Index: linux-2.6.12-rc3/fs/nfsd/nfs2acl.c =================================================================== --- /dev/null +++ linux-2.6.12-rc3/fs/nfsd/nfs2acl.c @@ -0,0 +1,336 @@ +/* + * linux/fs/nfsd/nfsacl.c + * + * Process version 2 NFSACL requests. + * + * Copyright (C) 2002-2003 Andreas Gruenbacher + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFSDDBG_FACILITY NFSDDBG_PROC +#define RETURN_STATUS(st) { resp->status = (st); return (st); } + +/* + * NULL call. + */ +static int +nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +/* + * Get the Access and/or Default ACL of a file. + */ +static int nfsacld_proc_getacl(struct svc_rqst * rqstp, + struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) +{ + svc_fh *fh; + struct posix_acl *acl; + int nfserr = 0; + + dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); + + fh = fh_copy(&resp->fh, &argp->fh); + if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) + RETURN_STATUS(nfserr_inval); + + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + RETURN_STATUS(nfserr_inval); + resp->mask = argp->mask; + + if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { + acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + if (acl == NULL) { + /* Solaris returns the inode's minimum ACL. */ + + struct inode *inode = fh->fh_dentry->d_inode; + acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + } + resp->acl_access = acl; + } + if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { + /* Check how Solaris handles requests for the Default ACL + of a non-directory! */ + + acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + resp->acl_default = acl; + } + + /* resp->acl_{access,default} are released in nfssvc_release_getacl. */ + RETURN_STATUS(0); + +fail: + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * Set the Access and/or Default ACL of a file. + */ +static int nfsacld_proc_setacl(struct svc_rqst * rqstp, + struct nfsd3_setaclargs *argp, + struct nfsd_attrstat *resp) +{ + svc_fh *fh; + int nfserr = 0; + + dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); + + fh = fh_copy(&resp->fh, &argp->fh); + nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_ACCESS, argp->acl_access) ); + } + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + } + + /* argp->acl_{access,default} may have been allocated in + nfssvc_decode_setaclargs. */ + posix_acl_release(argp->acl_access); + posix_acl_release(argp->acl_default); + return nfserr; +} + +/* + * Check file attributes + */ +static int nfsacld_proc_getattr(struct svc_rqst * rqstp, + struct nfsd_fhandle *argp, struct nfsd_attrstat *resp) +{ + dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); + + fh_copy(&resp->fh, &argp->fh); + return fh_verify(rqstp, &resp->fh, 0, MAY_NOP); +} + +/* + * Check file access + */ +static int nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp, + struct nfsd3_accessres *resp) +{ + int nfserr; + + dprintk("nfsd: ACCESS(2acl) %s 0x%x\n", + SVCFH_fmt(&argp->fh), + argp->access); + + fh_copy(&resp->fh, &argp->fh); + resp->access = argp->access; + nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); + return nfserr; +} + +/* + * XDR decode functions + */ +static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclargs *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->mask = ntohl(*p); p++; + + return xdr_argsize_check(rqstp, p); +} + + +static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_setaclargs *argp) +{ + struct kvec *head = rqstp->rq_arg.head; + unsigned int base; + int n; + + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->mask = ntohl(*p++); + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || + !xdr_argsize_check(rqstp, p)) + return 0; + + base = (char *)p - (char *)head->iov_base; + n = nfsacl_decode(&rqstp->rq_arg, base, NULL, + (argp->mask & NFS_ACL) ? + &argp->acl_access : NULL); + if (n > 0) + n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL, + (argp->mask & NFS_DFACL) ? + &argp->acl_default : NULL); + return (n > 0); +} + +static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_fhandle *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + return xdr_argsize_check(rqstp, p); +} + +static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_accessargs *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->access = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +/* + * XDR encode functions + */ + +/* GETACL */ +static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + struct dentry *dentry = resp->fh.fh_dentry; + struct inode *inode = dentry->d_inode; + int w = nfsacl_size( + (resp->mask & NFS_ACL) ? resp->acl_access : NULL, + (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); + struct kvec *head = rqstp->rq_res.head; + unsigned int base; + int n; + + if (dentry == NULL || dentry->d_inode == NULL) + return 0; + inode = dentry->d_inode; + + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + *p++ = htonl(resp->mask); + if (!xdr_ressize_check(rqstp, p)) + return 0; + base = (char *)p - (char *)head->iov_base; + + rqstp->rq_res.page_len = w; + while (w > 0) { + if (!svc_take_res_page(rqstp)) + return 0; + w -= PAGE_SIZE; + } + + n = nfsacl_encode(&rqstp->rq_res, base, inode, + resp->acl_access, + resp->mask & NFS_ACL, 0); + if (n > 0) + n = nfsacl_encode(&rqstp->rq_res, base + n, inode, + resp->acl_default, + resp->mask & NFS_DFACL, + NFS_ACL_DEFAULT); + if (n <= 0) + return 0; + return 1; +} + +static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, u32 *p, + struct nfsd_attrstat *resp) +{ + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + return xdr_ressize_check(rqstp, p); +} + +/* ACCESS */ +static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_accessres *resp) +{ + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + *p++ = htonl(resp->access); + return xdr_ressize_check(rqstp, p); +} + +/* + * XDR release functions + */ +static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + fh_put(&resp->fh); + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + return 1; +} + +static int nfsaclsvc_release_fhandle(struct svc_rqst *rqstp, u32 *p, + struct nfsd_fhandle *resp) +{ + fh_put(&resp->fh); + return 1; +} + +#define nfsaclsvc_decode_voidargs NULL +#define nfsaclsvc_encode_voidres NULL +#define nfsaclsvc_release_void NULL +#define nfsd3_fhandleargs nfsd_fhandle +#define nfsd3_attrstatres nfsd_attrstat +#define nfsd3_voidres nfsd3_voidargs +struct nfsd3_voidargs { int dummy; }; + +#define PROC(name, argt, rest, relt, cache, respsize) \ + { (svc_procfunc) nfsacld_proc_##name, \ + (kxdrproc_t) nfsaclsvc_decode_##argt##args, \ + (kxdrproc_t) nfsaclsvc_encode_##rest##res, \ + (kxdrproc_t) nfsaclsvc_release_##relt, \ + sizeof(struct nfsd3_##argt##args), \ + sizeof(struct nfsd3_##rest##res), \ + 0, \ + cache, \ + respsize, \ + } + +#define ST 1 /* status*/ +#define AT 21 /* attributes */ +#define pAT (1+AT) /* post attributes - conditional */ +#define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */ + +static struct svc_procedure nfsd_acl_procedures2[] = { + PROC(null, void, void, void, RC_NOCACHE, ST), + PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)), + PROC(setacl, setacl, attrstat, fhandle, RC_NOCACHE, ST+AT), + PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT), + PROC(access, access, access, fhandle, RC_NOCACHE, ST+AT+1), +}; + +struct svc_version nfsd_acl_version2 = { + .vs_vers = 2, + .vs_nproc = 5, + .vs_proc = nfsd_acl_procedures2, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS3_SVC_XDRSIZE, +}; Index: linux-2.6.12-rc3/fs/nfsd/nfs3acl.c =================================================================== --- /dev/null +++ linux-2.6.12-rc3/fs/nfsd/nfs3acl.c @@ -0,0 +1,267 @@ +/* + * linux/fs/nfsd/nfs3acl.c + * + * Process version 3 NFSACL requests. + * + * Copyright (C) 2002-2003 Andreas Gruenbacher + */ + +#include +#include +#include +#include +#include +#include +#include + +#define RETURN_STATUS(st) { resp->status = (st); return (st); } + +/* + * NULL call. + */ +static int +nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +/* + * Get the Access and/or Default ACL of a file. + */ +static int nfsd3_proc_getacl(struct svc_rqst * rqstp, + struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) +{ + svc_fh *fh; + struct posix_acl *acl; + int nfserr = 0; + + fh = fh_copy(&resp->fh, &argp->fh); + if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) + RETURN_STATUS(nfserr_inval); + + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + RETURN_STATUS(nfserr_inval); + resp->mask = argp->mask; + + if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { + acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + if (acl == NULL) { + /* Solaris returns the inode's minimum ACL. */ + + struct inode *inode = fh->fh_dentry->d_inode; + acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + } + resp->acl_access = acl; + } + if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { + /* Check how Solaris handles requests for the Default ACL + of a non-directory! */ + + acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + resp->acl_default = acl; + } + + /* resp->acl_{access,default} are released in nfs3svc_release_getacl. */ + RETURN_STATUS(0); + +fail: + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * Set the Access and/or Default ACL of a file. + */ +static int nfsd3_proc_setacl(struct svc_rqst * rqstp, + struct nfsd3_setaclargs *argp, + struct nfsd3_attrstat *resp) +{ + svc_fh *fh; + int nfserr = 0; + + fh = fh_copy(&resp->fh, &argp->fh); + nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_ACCESS, argp->acl_access) ); + } + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + } + + /* argp->acl_{access,default} may have been allocated in + nfs3svc_decode_setaclargs. */ + posix_acl_release(argp->acl_access); + posix_acl_release(argp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * XDR decode functions + */ +static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclargs *args) +{ + if (!(p = nfs3svc_decode_fh(p, &args->fh))) + return 0; + args->mask = ntohl(*p); p++; + + return xdr_argsize_check(rqstp, p); +} + + +static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_setaclargs *args) +{ + struct kvec *head = rqstp->rq_arg.head; + unsigned int base; + int n; + + if (!(p = nfs3svc_decode_fh(p, &args->fh))) + return 0; + args->mask = ntohl(*p++); + if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || + !xdr_argsize_check(rqstp, p)) + return 0; + + base = (char *)p - (char *)head->iov_base; + n = nfsacl_decode(&rqstp->rq_arg, base, NULL, + (args->mask & NFS_ACL) ? + &args->acl_access : NULL); + if (n > 0) + n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL, + (args->mask & NFS_DFACL) ? + &args->acl_default : NULL); + return (n > 0); +} + +/* + * XDR encode functions + */ + +/* GETACL */ +static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + struct dentry *dentry = resp->fh.fh_dentry; + + p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0 && dentry && dentry->d_inode) { + struct inode *inode = dentry->d_inode; + int w = nfsacl_size( + (resp->mask & NFS_ACL) ? resp->acl_access : NULL, + (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); + struct kvec *head = rqstp->rq_res.head; + unsigned int base; + int n; + + *p++ = htonl(resp->mask); + if (!xdr_ressize_check(rqstp, p)) + return 0; + base = (char *)p - (char *)head->iov_base; + + rqstp->rq_res.page_len = w; + while (w > 0) { + if (!svc_take_res_page(rqstp)) + return 0; + w -= PAGE_SIZE; + } + + n = nfsacl_encode(&rqstp->rq_res, base, inode, + resp->acl_access, + resp->mask & NFS_ACL, 0); + if (n > 0) + n = nfsacl_encode(&rqstp->rq_res, base + n, inode, + resp->acl_default, + resp->mask & NFS_DFACL, + NFS_ACL_DEFAULT); + if (n <= 0) + return 0; + } else + if (!xdr_ressize_check(rqstp, p)) + return 0; + + return 1; +} + +/* SETACL */ +static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_attrstat *resp) +{ + p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); + + return xdr_ressize_check(rqstp, p); +} + +/* + * XDR release functions + */ +static int nfs3svc_release_getacl(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + fh_put(&resp->fh); + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + return 1; +} + +#define nfs3svc_decode_voidargs NULL +#define nfs3svc_release_void NULL +#define nfsd3_setaclres nfsd3_attrstat +#define nfsd3_voidres nfsd3_voidargs +struct nfsd3_voidargs { int dummy; }; + +#define PROC(name, argt, rest, relt, cache, respsize) \ + { (svc_procfunc) nfsd3_proc_##name, \ + (kxdrproc_t) nfs3svc_decode_##argt##args, \ + (kxdrproc_t) nfs3svc_encode_##rest##res, \ + (kxdrproc_t) nfs3svc_release_##relt, \ + sizeof(struct nfsd3_##argt##args), \ + sizeof(struct nfsd3_##rest##res), \ + 0, \ + cache, \ + respsize, \ + } + +#define ST 1 /* status*/ +#define AT 21 /* attributes */ +#define pAT (1+AT) /* post attributes - conditional */ +#define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */ + +static struct svc_procedure nfsd_acl_procedures3[] = { + PROC(null, void, void, void, RC_NOCACHE, ST), + PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)), + PROC(setacl, setacl, setacl, fhandle, RC_NOCACHE, ST+pAT), +}; + +struct svc_version nfsd_acl_version3 = { + .vs_vers = 3, + .vs_nproc = 3, + .vs_proc = nfsd_acl_procedures3, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS3_SVC_XDRSIZE, +}; + Index: linux-2.6.12-rc3/fs/nfsd/nfs3xdr.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfsd/nfs3xdr.c +++ linux-2.6.12-rc3/fs/nfsd/nfs3xdr.c @@ -71,6 +71,12 @@ decode_fh(u32 *p, struct svc_fh *fhp) return p + XDR_QUADLEN(size); } +/* Helper function for NFSv3 ACL code */ +u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp) +{ + return decode_fh(p, fhp); +} + static inline u32 * encode_fh(u32 *p, struct svc_fh *fhp) { @@ -233,6 +239,13 @@ encode_post_op_attr(struct svc_rqst *rqs return p; } +/* Helper for NFSv3 ACLs */ +u32 * +nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + return encode_post_op_attr(rqstp, p, fhp); +} + /* * Enocde weak cache consistency data */ Index: linux-2.6.12-rc3/fs/nfsd/nfs4callback.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfsd/nfs4callback.c +++ linux-2.6.12-rc3/fs/nfsd/nfs4callback.c @@ -430,7 +430,7 @@ nfsd4_probe_callback(struct nfs4_client clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX); if (IS_ERR(clnt)) { dprintk("NFSD: couldn't create callback client\n"); - goto out_xprt; + goto out_err; } clnt->cl_intr = 0; clnt->cl_softrtry = 1; @@ -465,8 +465,6 @@ out_rpciod: out_clnt: rpc_shutdown_client(clnt); goto out_err; -out_xprt: - xprt_destroy(xprt); out_err: dprintk("NFSD: warning: no callback path to client %.*s\n", (int)clp->cl_name.len, clp->cl_name.data); Index: linux-2.6.12-rc3/fs/nfsd/nfsproc.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfsd/nfsproc.c +++ linux-2.6.12-rc3/fs/nfsd/nfsproc.c @@ -591,6 +591,7 @@ nfserrno (int errno) { nfserr_dropit, -ENOMEM }, { nfserr_badname, -ESRCH }, { nfserr_io, -ETXTBSY }, + { nfserr_notsupp, -EOPNOTSUPP }, { -1, -EIO } }; int i; Index: linux-2.6.12-rc3/fs/nfsd/nfssvc.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfsd/nfssvc.c +++ linux-2.6.12-rc3/fs/nfsd/nfssvc.c @@ -31,6 +31,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_SVC @@ -364,6 +365,31 @@ nfsd_dispatch(struct svc_rqst *rqstp, u3 return 1; } +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +static struct svc_stat nfsd_acl_svcstats; +static struct svc_version * nfsd_acl_version[] = { + [2] = &nfsd_acl_version2, + [3] = &nfsd_acl_version3, +}; + +#define NFSD_ACL_NRVERS (sizeof(nfsd_acl_version)/sizeof(nfsd_acl_version[0])) +static struct svc_program nfsd_acl_program = { + .pg_prog = NFS_ACL_PROGRAM, + .pg_nvers = NFSD_ACL_NRVERS, + .pg_vers = nfsd_acl_version, + .pg_name = "nfsd", + .pg_stats = &nfsd_acl_svcstats, +}; + +static struct svc_stat nfsd_acl_svcstats = { + .program = &nfsd_acl_program, +}; + +#define nfsd_acl_program_p &nfsd_acl_program +#else +#define nfsd_acl_program_p NULL +#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ + extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; static struct svc_version * nfsd_version[] = { @@ -378,6 +404,7 @@ static struct svc_version * nfsd_version #define NFSD_NRVERS (sizeof(nfsd_version)/sizeof(nfsd_version[0])) struct svc_program nfsd_program = { + .pg_next = nfsd_acl_program_p, .pg_prog = NFS_PROGRAM, /* program number */ .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */ .pg_vers = nfsd_version, /* version table */ Index: linux-2.6.12-rc3/fs/nfsd/nfsxdr.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfsd/nfsxdr.c +++ linux-2.6.12-rc3/fs/nfsd/nfsxdr.c @@ -49,6 +49,12 @@ decode_fh(u32 *p, struct svc_fh *fhp) return p + (NFS_FHSIZE >> 2); } +/* Helper function for NFSv2 ACL code */ +u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp) +{ + return decode_fh(p, fhp); +} + static inline u32 * encode_fh(u32 *p, struct svc_fh *fhp) { @@ -190,6 +196,11 @@ encode_fattr(struct svc_rqst *rqstp, u32 return p; } +/* Helper function for NFSv2 ACL code */ +u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + return encode_fattr(rqstp, p, fhp); +} /* * XDR decode functions Index: linux-2.6.12-rc3/fs/nfsd/vfs.c =================================================================== --- linux-2.6.12-rc3.orig/fs/nfsd/vfs.c +++ linux-2.6.12-rc3/fs/nfsd/vfs.c @@ -46,8 +46,9 @@ #include #include #include -#ifdef CONFIG_NFSD_V4 +#include #include +#ifdef CONFIG_NFSD_V4 #include #include #include @@ -1857,3 +1858,107 @@ nfsd_racache_init(int cache_size) nfsdstats.ra_size = cache_size; return 0; } + +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +struct posix_acl * +nfsd_get_posix_acl(struct svc_fh *fhp, int type) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + char *name; + void *value = NULL; + ssize_t size; + struct posix_acl *acl; + + if (!IS_POSIXACL(inode) || !inode->i_op || !inode->i_op->getxattr) + return ERR_PTR(-EOPNOTSUPP); + switch(type) { + case ACL_TYPE_ACCESS: + name = XATTR_NAME_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name = XATTR_NAME_ACL_DEFAULT; + break; + default: + return ERR_PTR(-EOPNOTSUPP); + } + + size = inode->i_op->getxattr(fhp->fh_dentry, name, NULL, 0); + + if (size < 0) { + acl = ERR_PTR(size); + goto getout; + } else if (size > 0) { + value = kmalloc(size, GFP_KERNEL); + if (!value) { + acl = ERR_PTR(-ENOMEM); + goto getout; + } + size = inode->i_op->getxattr(fhp->fh_dentry, name, value, size); + if (size < 0) { + acl = ERR_PTR(size); + goto getout; + } + } + acl = posix_acl_from_xattr(value, size); + +getout: + kfree(value); + return acl; +} + +int +nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + char *name; + void *value = NULL; + size_t size; + int error; + + if (!IS_POSIXACL(inode) || !inode->i_op || + !inode->i_op->setxattr || !inode->i_op->removexattr) + return -EOPNOTSUPP; + switch(type) { + case ACL_TYPE_ACCESS: + name = XATTR_NAME_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name = XATTR_NAME_ACL_DEFAULT; + break; + default: + return -EOPNOTSUPP; + } + + if (acl && acl->a_count) { + size = xattr_acl_size(acl->a_count); + value = kmalloc(size, GFP_KERNEL); + if (!value) + return -ENOMEM; + size = posix_acl_to_xattr(acl, value, size); + if (size < 0) { + error = size; + goto getout; + } + } else + size = 0; + + if (!fhp->fh_locked) + fh_lock(fhp); /* unlocking is done automatically */ + if (size) + error = inode->i_op->setxattr(fhp->fh_dentry, name, + value, size, 0); + else { + if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) + error = 0; + else { + error = inode->i_op->removexattr(fhp->fh_dentry, name); + if (error == -ENODATA) + error = 0; + } + } + +getout: + kfree(value); + return error; +} +#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ Index: linux-2.6.12-rc3/fs/open.c =================================================================== --- linux-2.6.12-rc3.orig/fs/open.c +++ linux-2.6.12-rc3/fs/open.c @@ -763,13 +763,25 @@ struct file *filp_open(const char * file error = open_namei(filename, namei_flags, mode, &nd); if (!error) - return dentry_open(nd.dentry, nd.mnt, flags); + return nd_open_file(&nd, flags); return ERR_PTR(error); } EXPORT_SYMBOL(filp_open); +struct file *nd_open_file(struct nameidata *nd, int flags) +{ + struct file *filp; + + if ((nd->flags & LOOKUP_OPEN) && nd->intent.open.file != NULL) { + filp = nd->intent.open.file; + path_release(nd); + } else + filp = dentry_open(nd->dentry, nd->mnt, flags); + return filp; +} + struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) { struct file * f; Index: linux-2.6.12-rc3/fs/proc/base.c =================================================================== --- linux-2.6.12-rc3.orig/fs/proc/base.c +++ linux-2.6.12-rc3/fs/proc/base.c @@ -66,6 +66,7 @@ enum pid_directory_inos { PROC_TGID_STATM, PROC_TGID_MAPS, PROC_TGID_MOUNTS, + PROC_TGID_MOUNTSTATS, PROC_TGID_WCHAN, #ifdef CONFIG_SCHEDSTATS PROC_TGID_SCHEDSTAT, @@ -103,6 +104,7 @@ enum pid_directory_inos { PROC_TID_STATM, PROC_TID_MAPS, PROC_TID_MOUNTS, + PROC_TID_MOUNTSTATS, PROC_TID_WCHAN, #ifdef CONFIG_SCHEDSTATS PROC_TID_SCHEDSTAT, @@ -152,6 +154,7 @@ static struct pid_entry tgid_base_stuff[ E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), + E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUGO), #ifdef CONFIG_SECURITY E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif @@ -188,6 +191,7 @@ static struct pid_entry tid_base_stuff[] E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), + E(PROC_TID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUGO), #ifdef CONFIG_SECURITY E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif @@ -555,6 +559,38 @@ static struct file_operations proc_mount .release = mounts_release, }; +extern struct seq_operations mountstats_op; +static int mountstats_open(struct inode *inode, struct file *file) +{ + struct task_struct *task = proc_task(inode); + int ret = seq_open(file, &mountstats_op); + + if (!ret) { + struct seq_file *m = file->private_data; + struct namespace *namespace; + task_lock(task); + namespace = task->namespace; + if (namespace) + get_namespace(namespace); + task_unlock(task); + + if (namespace) + m->private = namespace; + else { + seq_release(inode, file); + ret = -EINVAL; + } + } + return ret; +} + +static struct file_operations proc_mountstats_operations = { + .open = mountstats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mounts_release, +}; + #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ static ssize_t proc_info_read(struct file * file, char __user * buf, @@ -1535,6 +1571,10 @@ static struct dentry *proc_pident_lookup case PROC_TGID_MOUNTS: inode->i_fop = &proc_mounts_operations; break; + case PROC_TID_MOUNTSTATS: + case PROC_TGID_MOUNTSTATS: + inode->i_fop = &proc_mountstats_operations; + break; #ifdef CONFIG_SECURITY case PROC_TID_ATTR: inode->i_nlink = 2; Index: linux-2.6.12-rc3/fs/super.c =================================================================== --- linux-2.6.12-rc3.orig/fs/super.c +++ linux-2.6.12-rc3/fs/super.c @@ -794,17 +794,13 @@ struct super_block *get_sb_single(struct EXPORT_SYMBOL(get_sb_single); struct vfsmount * -do_kern_mount(const char *fstype, int flags, const char *name, void *data) +vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { - struct file_system_type *type = get_fs_type(fstype); struct super_block *sb = ERR_PTR(-ENOMEM); struct vfsmount *mnt; int error; char *secdata = NULL; - if (!type) - return ERR_PTR(-ENODEV); - mnt = alloc_vfsmnt(name); if (!mnt) goto out; @@ -835,7 +831,6 @@ do_kern_mount(const char *fstype, int fl mnt->mnt_parent = mnt; mnt->mnt_namespace = current->namespace; up_write(&sb->s_umount); - put_filesystem(type); return mnt; out_sb: up_write(&sb->s_umount); @@ -846,10 +841,23 @@ out_free_secdata: out_mnt: free_vfsmnt(mnt); out: - put_filesystem(type); return (struct vfsmount *)sb; } +EXPORT_SYMBOL_GPL(vfs_kern_mount); + +struct vfsmount * +do_kern_mount(const char *fstype, int flags, const char *name, void *data) +{ + struct file_system_type *type = get_fs_type(fstype); + struct vfsmount *mnt; + if (!type) + return ERR_PTR(-ENODEV); + mnt = vfs_kern_mount(type, flags, name, data); + put_filesystem(type); + return mnt; +} + EXPORT_SYMBOL_GPL(do_kern_mount); struct vfsmount *kern_mount(struct file_system_type *type) Index: linux-2.6.12-rc3/include/linux/fs.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/fs.h +++ linux-2.6.12-rc3/include/linux/fs.h @@ -1010,6 +1010,7 @@ struct super_operations { void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct vfsmount *); + int (*show_stats)(struct seq_file *, struct vfsmount *); ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); Index: linux-2.6.12-rc3/include/linux/iosem.h =================================================================== --- /dev/null +++ linux-2.6.12-rc3/include/linux/iosem.h @@ -0,0 +1,110 @@ +/* + * include/linux/iosem.h + * + * Copyright (C) 2005 Trond Myklebust + * + * Definitions for iosems. These can act as mutexes, but unlike + * semaphores, their code is 100% arch-independent, and can therefore + * easily be expanded in order to provide for things like + * asynchronous I/O. + */ + +#ifndef __LINUX_SEM_LOCK_H +#define __LINUX_SEM_LOCK_H + +#ifdef __KERNEL__ +#include +#include + +/* + * struct iosem: iosem mutex + * state: bitmask - currently only signals whether or not an exclusive + * lock has been taken + * wait: FIFO wait queue + */ +struct iosem { +#define IOSEM_LOCK_EXCLUSIVE (31) +/* #define IOSEM_LOCK_SHARED (30) */ + unsigned long state; + wait_queue_head_t wait; +}; + + + +/* + * struct iosem_wait: acts as a request for a lock on the iosem + * lock: backpointer to the iosem + * wait: wait queue entry. note that the callback function + * defines what to do when the lock has been granted + */ +struct iosem_wait { + struct iosem *lock; + wait_queue_t wait; +}; + +/* + * struct iosem_work: used by asynchronous waiters. + * + * work: work to schedule once the iosem has been granted. The + * function containing the critical code that needs to + * run under the protection of the lock should be placed here. + * The same function is responsible for calling iosem_unlock() + * when done. + * waiter: iosem waitqueue entry + */ +struct iosem_work { + struct work_struct work; + struct iosem_wait waiter; +}; + +/* + * Functions for synchronous i/o + */ + +/* Synchronously grab an iosem. + * These functions act in pretty much the same way down()/up() + * do for semaphores. + */ +extern void FASTCALL(iosem_lock(struct iosem *lk)); +extern void FASTCALL(iosem_unlock(struct iosem *lk)); + +/* + * Callback function to wake up the sleeping task once + * it has been granted an exclusive lock + */ +extern int iosem_lock_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); + +/* Initialize a struct iosem in the "unlocked" state */ +static inline void iosem_init(struct iosem *lk) +{ + lk->state = 0; + init_waitqueue_head(&lk->wait); +} + +/* Initializes a lock request */ +static inline void iosem_waiter_init(struct iosem_wait *waiter) +{ + waiter->lock = NULL; + init_waitqueue_entry(&waiter->wait, current); + INIT_LIST_HEAD(&waiter->wait.task_list); +} + +/* + * Functions for asynchronous I/O. + */ + +/* Requests an exclusive lock on the iosem on behalf of a workqueue entry "wk". + * Schedule wk->work for execution as soon as the lock is granted. */ +extern int FASTCALL(iosem_lock_and_schedule_work(struct iosem *lk, struct iosem_work *wk)); + +/* Waitqueue notifier that schedules work once the exclusive lock has + * been granted */ +extern int iosem_lock_and_schedule_function(wait_queue_t *wait, unsigned mode, int sync, void *key); + +static inline void iosem_work_init(struct iosem_work *wk, void (*func)(void *), void *data) +{ + INIT_WORK(&wk->work, func, data); +} + +#endif /* __KERNEL__ */ +#endif /* __LINUX_SEM_LOCK_H */ Index: linux-2.6.12-rc3/include/linux/mount.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/mount.h +++ linux-2.6.12-rc3/include/linux/mount.h @@ -68,6 +68,11 @@ extern struct vfsmount *alloc_vfsmnt(con extern struct vfsmount *do_kern_mount(const char *fstype, int flags, const char *name, void *data); +struct file_system_type; +extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, + int flags, const char *name, + void *data); + struct nameidata; extern int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, Index: linux-2.6.12-rc3/include/linux/namei.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/namei.h +++ linux-2.6.12-rc3/include/linux/namei.h @@ -8,6 +8,7 @@ struct vfsmount; struct open_intent { int flags; int create_mode; + struct file *file; }; enum { MAX_NESTED_LINKS = 5 }; @@ -64,6 +65,15 @@ extern int FASTCALL(path_walk(const char extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); extern void path_release(struct nameidata *); extern void path_release_on_umount(struct nameidata *); +extern void path_release_open_intent(struct nameidata *); + +extern struct file *nd_open_file(struct nameidata *nd, int flags); +static inline void nd_init_open_intent(struct nameidata *nd, int flags, int mode) +{ + nd->intent.open.flags = flags; + nd->intent.open.create_mode = mode; + nd->intent.open.file = NULL; +} extern struct dentry * lookup_one_len(const char *, struct dentry *, int); extern struct dentry * lookup_hash(struct qstr *, struct dentry *); Index: linux-2.6.12-rc3/include/linux/nfs4.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/nfs4.h +++ linux-2.6.12-rc3/include/linux/nfs4.h @@ -382,6 +382,9 @@ enum { NFSPROC4_CLNT_READDIR, NFSPROC4_CLNT_SERVER_CAPS, NFSPROC4_CLNT_DELEGRETURN, + NFSPROC4_CLNT_GETACL, + NFSPROC4_CLNT_SETACL, + NFSPROC4_CLNT_FS_LOCATIONS, }; #endif Index: linux-2.6.12-rc3/include/linux/nfs_fs.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/nfs_fs.h +++ linux-2.6.12-rc3/include/linux/nfs_fs.h @@ -15,9 +15,6 @@ #include #include #include -#include - -#include #include #include @@ -28,8 +25,10 @@ #include #include #include + +#include + #include -#include #include /* @@ -44,13 +43,6 @@ #define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 /* - * The upper limit on timeouts for the exponential backoff algorithm. - */ -#define NFS_WRITEBACK_DELAY (5*HZ) -#define NFS_WRITEBACK_LOCKDELAY (60*HZ) -#define NFS_COMMIT_DELAY (5*HZ) - -/* * superblock magic number for NFS */ #define NFS_SUPER_MAGIC 0x6969 @@ -60,9 +52,6 @@ */ #define NFS_RPC_SWAPFLAGS (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) -#define NFS_RW_SYNC 0x0001 /* O_SYNC handling */ -#define NFS_RW_SWAP 0x0002 /* This is a swap request */ - /* * When flushing a cluster of dirty pages, there can be different * strategies: @@ -96,7 +85,8 @@ struct nfs_open_context { int error; struct list_head list; - wait_queue_head_t waitq; + + __u64 dir_cookie; }; /* @@ -104,6 +94,8 @@ struct nfs_open_context { */ struct nfs_delegation; +struct posix_acl; + /* * nfs fs inode data in memory */ @@ -140,7 +132,6 @@ struct nfs_inode { * * mtime != read_cache_mtime */ - unsigned long readdir_timestamp; unsigned long read_cache_jiffies; unsigned long attrtimeo; unsigned long attrtimeo_timestamp; @@ -158,6 +149,10 @@ struct nfs_inode { atomic_t data_updates; struct nfs_access_entry cache_access; +#ifdef CONFIG_NFS_V3_ACL + struct posix_acl *acl_access; + struct posix_acl *acl_default; +#endif /* * This is the cookie verifier used for NFSv3 readdir @@ -183,13 +178,13 @@ struct nfs_inode { wait_queue_head_t nfs_i_wait; #ifdef CONFIG_NFS_V4 + struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ struct list_head open_states; struct nfs_delegation *delegation; int delegation_state; struct rw_semaphore rwsem; #endif /* CONFIG_NFS_V4*/ - struct inode vfs_inode; }; @@ -203,6 +198,8 @@ struct nfs_inode { #define NFS_INO_INVALID_DATA 0x0010 /* cached data is invalid */ #define NFS_INO_INVALID_ATIME 0x0020 /* cached atime is invalid */ #define NFS_INO_INVALID_ACCESS 0x0040 /* cached access cred invalid */ +#define NFS_INO_INVALID_ACL 0x0080 /* cached acls are invalid */ +#define NFS_INO_REVAL_PAGECACHE 0x1000 /* must revalidate pagecache */ static inline struct nfs_inode *NFS_I(struct inode *inode) { @@ -294,18 +291,24 @@ extern int nfs_release(struct inode *, s extern int nfs_attribute_timeout(struct inode *inode); extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); +extern void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); extern int nfs_setattr(struct dentry *, struct iattr *); extern void nfs_begin_attr_update(struct inode *); extern void nfs_end_attr_update(struct inode *); extern void nfs_begin_data_update(struct inode *); extern void nfs_end_data_update(struct inode *); -extern void nfs_end_data_update_defer(struct inode *); extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, int mode); extern void nfs_file_clear_open_context(struct file *filp); +extern struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + struct nfs_fh *fh, + struct nfs_fattr *fattr); +extern int nfs_try_migrate_inode(struct inode *dir, struct dentry *parent); +extern int nfs_try_migrate_filehandle(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr, uint32_t generation); /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern u32 root_nfs_parse_addr(char *name); /*__init*/ @@ -314,6 +317,9 @@ extern u32 root_nfs_parse_addr(char *nam * linux/fs/nfs/file.c */ extern struct inode_operations nfs_file_inode_operations; +#ifdef CONFIG_NFS_V3 +extern struct inode_operations nfs3_file_inode_operations; +#endif /* CONFIG_NFS_V3 */ extern struct file_operations nfs_file_operations; extern struct address_space_operations nfs_file_aops; @@ -329,6 +335,22 @@ static inline struct rpc_cred *nfs_file_ } /* + * linux/fs/nfs/xattr.c + */ +#ifdef CONFIG_NFS_V3_ACL +extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t); +extern ssize_t nfs3_getxattr(struct dentry *, const char *, void *, size_t); +extern int nfs3_setxattr(struct dentry *, const char *, + const void *, size_t, int); +extern int nfs3_removexattr (struct dentry *, const char *name); +#else +# define nfs3_listxattr NULL +# define nfs3_getxattr NULL +# define nfs3_setxattr NULL +# define nfs3_removexattr NULL +#endif + +/* * linux/fs/nfs/direct.c */ extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t, @@ -342,6 +364,9 @@ extern ssize_t nfs_file_direct_write(str * linux/fs/nfs/dir.c */ extern struct inode_operations nfs_dir_inode_operations; +#ifdef CONFIG_NFS_V3 +extern struct inode_operations nfs3_dir_inode_operations; +#endif /* CONFIG_NFS_V3 */ extern struct file_operations nfs_dir_operations; extern struct dentry_operations nfs_dentry_operations; @@ -353,6 +378,13 @@ extern int nfs_instantiate(struct dentry extern struct inode_operations nfs_symlink_inode_operations; /* + * linux/fs/nfs/namespace.c + */ +extern struct inode_operations nfs_mountpoint_inode_operations; +extern int nfs_mountpoint_expiry_timeout; +extern void nfs_release_automount_timer(void); + +/* * linux/fs/nfs/unlink.c */ extern int nfs_async_unlink(struct dentry *); @@ -377,10 +409,10 @@ extern void nfs_commit_done(struct rpc_t */ extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -extern int nfs_commit_inode(struct inode *, unsigned long, unsigned int, int); +extern int nfs_commit_inode(struct inode *, int); #else static inline int -nfs_commit_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how) +nfs_commit_inode(struct inode *inode, int how) { return 0; } @@ -434,11 +466,6 @@ static inline void nfs_writedata_free(st mempool_free(p, nfs_wdata_mempool); } -/* Hack for future NFS swap support */ -#ifndef IS_SWAPFILE -# define IS_SWAPFILE(inode) (0) -#endif - /* * linux/fs/nfs/read.c */ @@ -468,6 +495,29 @@ static inline void nfs_readdata_free(str extern void nfs_readdata_release(struct rpc_task *task); /* + * linux/fs/nfs3proc.c + */ +#ifdef CONFIG_NFS_V3_ACL +extern struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type); +extern int nfs3_proc_setacl(struct inode *inode, int type, + struct posix_acl *acl); +extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, + mode_t mode); +extern void nfs3_forget_cached_acls(struct inode *inode); +#else +static inline int nfs3_proc_set_default_acl(struct inode *dir, + struct inode *inode, + mode_t mode) +{ + return 0; +} + +static inline void nfs3_forget_cached_acls(struct inode *inode) +{ +} +#endif /* CONFIG_NFS_V3_ACL */ + +/* * linux/fs/mount_clnt.c * (Used only by nfsroot module) */ @@ -515,230 +565,6 @@ extern void * nfs_root_data(void); #define NFS_JUKEBOX_RETRY_TIME (5 * HZ) -#ifdef CONFIG_NFS_V4 - -struct idmap; - -/* - * In a seqid-mutating op, this macro controls which error return - * values trigger incrementation of the seqid. - * - * from rfc 3010: - * The client MUST monotonically increment the sequence number for the - * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE - * operations. This is true even in the event that the previous - * operation that used the sequence number received an error. The only - * exception to this rule is if the previous operation received one of - * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID, - * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR, - * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE. - * - */ -#define seqid_mutating_err(err) \ -(((err) != NFSERR_STALE_CLIENTID) && \ - ((err) != NFSERR_STALE_STATEID) && \ - ((err) != NFSERR_BAD_STATEID) && \ - ((err) != NFSERR_BAD_SEQID) && \ - ((err) != NFSERR_BAD_XDR) && \ - ((err) != NFSERR_RESOURCE) && \ - ((err) != NFSERR_NOFILEHANDLE)) - -enum nfs4_client_state { - NFS4CLNT_OK = 0, -}; - -/* - * The nfs4_client identifies our client state to the server. - */ -struct nfs4_client { - struct list_head cl_servers; /* Global list of servers */ - struct in_addr cl_addr; /* Server identifier */ - u64 cl_clientid; /* constant */ - nfs4_verifier cl_confirm; - unsigned long cl_state; - - u32 cl_lockowner_id; - - /* - * The following rwsem ensures exclusive access to the server - * while we recover the state following a lease expiration. - */ - struct rw_semaphore cl_sem; - - struct list_head cl_delegations; - struct list_head cl_state_owners; - struct list_head cl_unused; - int cl_nunused; - spinlock_t cl_lock; - atomic_t cl_count; - - struct rpc_clnt * cl_rpcclient; - struct rpc_cred * cl_cred; - - struct list_head cl_superblocks; /* List of nfs_server structs */ - - unsigned long cl_lease_time; - unsigned long cl_last_renewal; - struct work_struct cl_renewd; - struct work_struct cl_recoverd; - - wait_queue_head_t cl_waitq; - struct rpc_wait_queue cl_rpcwaitq; - - /* used for the setclientid verifier */ - struct timespec cl_boot_time; - - /* idmapper */ - struct idmap * cl_idmap; - - /* Our own IP address, as a null-terminated string. - * This is used to generate the clientid, and the callback address. - */ - char cl_ipaddr[16]; - unsigned char cl_id_uniquifier; -}; - -/* - * NFS4 state_owners and lock_owners are simply labels for ordered - * sequences of RPC calls. Their sole purpose is to provide once-only - * semantics by allowing the server to identify replayed requests. - * - * The ->so_sema is held during all state_owner seqid-mutating operations: - * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize - * so_seqid. - */ -struct nfs4_state_owner { - struct list_head so_list; /* per-clientid list of state_owners */ - struct nfs4_client *so_client; - u32 so_id; /* 32-bit identifier, unique */ - struct semaphore so_sema; - u32 so_seqid; /* protected by so_sema */ - atomic_t so_count; - - struct rpc_cred *so_cred; /* Associated cred */ - struct list_head so_states; - struct list_head so_delegations; -}; - -/* - * struct nfs4_state maintains the client-side state for a given - * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). - * - * OPEN: - * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, - * we need to know how many files are open for reading or writing on a - * given inode. This information too is stored here. - * - * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) - */ - -struct nfs4_lock_state { - struct list_head ls_locks; /* Other lock stateids */ - fl_owner_t ls_owner; /* POSIX lock owner */ -#define NFS_LOCK_INITIALIZED 1 - int ls_flags; - u32 ls_seqid; - u32 ls_id; - nfs4_stateid ls_stateid; - atomic_t ls_count; -}; - -/* bits for nfs4_state->flags */ -enum { - LK_STATE_IN_USE, - NFS_DELEGATED_STATE, -}; - -struct nfs4_state { - struct list_head open_states; /* List of states for the same state_owner */ - struct list_head inode_states; /* List of states for the same inode */ - struct list_head lock_states; /* List of subservient lock stateids */ - - struct nfs4_state_owner *owner; /* Pointer to the open owner */ - struct inode *inode; /* Pointer to the inode */ - - unsigned long flags; /* Do we hold any locks? */ - struct semaphore lock_sema; /* Serializes file locking operations */ - rwlock_t state_lock; /* Protects the lock_states list */ - - nfs4_stateid stateid; - - unsigned int nreaders; - unsigned int nwriters; - int state; /* State on the server (R,W, or RW) */ - atomic_t count; -}; - - -struct nfs4_exception { - long timeout; - int retry; -}; - -struct nfs4_state_recovery_ops { - int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); - int (*recover_lock)(struct nfs4_state *, struct file_lock *); -}; - -extern struct dentry_operations nfs4_dentry_operations; -extern struct inode_operations nfs4_dir_inode_operations; - -/* nfs4proc.c */ -extern int nfs4_map_errors(int err); -extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); -extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); -extern int nfs4_proc_async_renew(struct nfs4_client *); -extern int nfs4_proc_renew(struct nfs4_client *); -extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); -extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); -extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); - -extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; -extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; - -/* nfs4renewd.c */ -extern void nfs4_schedule_state_renewal(struct nfs4_client *); -extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); -extern void nfs4_kill_renewd(struct nfs4_client *); - -/* nfs4state.c */ -extern void init_nfsv4_state(struct nfs_server *); -extern void destroy_nfsv4_state(struct nfs_server *); -extern struct nfs4_client *nfs4_get_client(struct in_addr *); -extern void nfs4_put_client(struct nfs4_client *clp); -extern int nfs4_init_client(struct nfs4_client *clp); -extern struct nfs4_client *nfs4_find_client(struct in_addr *); -extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); - -extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); -extern void nfs4_put_state_owner(struct nfs4_state_owner *); -extern void nfs4_drop_state_owner(struct nfs4_state_owner *); -extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); -extern void nfs4_put_open_state(struct nfs4_state *); -extern void nfs4_close_state(struct nfs4_state *, mode_t); -extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); -extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); -extern void nfs4_schedule_state_recovery(struct nfs4_client *); -extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t); -extern struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t); -extern void nfs4_put_lock_state(struct nfs4_lock_state *state); -extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); -extern void nfs4_notify_setlk(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); -extern void nfs4_notify_unlck(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); -extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); - - - -struct nfs4_mount_data; -#else -#define init_nfsv4_state(server) do { } while (0) -#define destroy_nfsv4_state(server) do { } while (0) -#define nfs4_put_state_owner(inode, owner) do { } while (0) -#define nfs4_put_open_state(state) do { } while (0) -#define nfs4_close_state(a, b) do { } while (0) -#define nfs4_renewd_prepare_shutdown(server) do { } while (0) -#endif - #endif /* __KERNEL__ */ /* Index: linux-2.6.12-rc3/include/linux/nfs_fs_sb.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/nfs_fs_sb.h +++ linux-2.6.12-rc3/include/linux/nfs_fs_sb.h @@ -4,13 +4,17 @@ #include #include +struct nfs_iostats; + /* * NFS client parameters stored in the superblock. */ struct nfs_server { struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ + struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ + struct nfs_iostats * io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; int flags; /* various flags */ unsigned int caps; /* server capabilities */ @@ -25,10 +29,14 @@ struct nfs_server { unsigned int acregmax; unsigned int acdirmin; unsigned int acdirmax; + unsigned long retrans_timeo; /* retransmit timeout */ + unsigned int retrans_count; /* number of retransmit tries */ unsigned int namelen; char * hostname; /* remote hostname */ struct nfs_fh fh; struct sockaddr_in addr; + struct nfs_fsid fsid; + uint32_t generation; #ifdef CONFIG_NFS_V4 /* Our own IP address, as a null-terminated string. * This is used to generate the clientid, and the callback address. Index: linux-2.6.12-rc3/include/linux/nfs_mount.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/nfs_mount.h +++ linux-2.6.12-rc3/include/linux/nfs_mount.h @@ -58,6 +58,7 @@ struct nfs_mount_data { #define NFS_MOUNT_KERBEROS 0x0100 /* 3 */ #define NFS_MOUNT_NONLM 0x0200 /* 3 */ #define NFS_MOUNT_BROKEN_SUID 0x0400 /* 4 */ +#define NFS_MOUNT_NOACL 0x0800 /* 4 */ #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ #define NFS_MOUNT_FLAGMASK 0xFFFF Index: linux-2.6.12-rc3/include/linux/nfs_page.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/nfs_page.h +++ linux-2.6.12-rc3/include/linux/nfs_page.h @@ -13,19 +13,25 @@ #include #include #include -#include #include #include #include /* + * Valid flags for the radix tree + */ +#define NFS_PAGE_TAG_DIRTY 0 +#define NFS_PAGE_TAG_WRITEBACK 1 + +/* * Valid flags for a dirty buffer */ #define PG_BUSY 0 #define PG_NEED_COMMIT 1 #define PG_NEED_RESCHED 2 +struct nfs_inode; struct nfs_page { struct list_head wb_list, /* Defines state of page: */ *wb_list_head; /* read/write/commit */ @@ -54,14 +60,17 @@ extern void nfs_clear_request(struct nfs extern void nfs_release_request(struct nfs_page *req); -extern void nfs_list_add_request(struct nfs_page *, struct list_head *); - +extern int nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, + unsigned long idx_start, unsigned int npages); extern int nfs_scan_list(struct list_head *, struct list_head *, unsigned long, unsigned int); extern int nfs_coalesce_requests(struct list_head *, struct list_head *, unsigned int); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); +extern int nfs_set_page_writeback_locked(struct nfs_page *req); +extern void nfs_clear_page_writeback(struct nfs_page *req); + /* * Lock the page of an asynchronous request without incrementing the wb_count @@ -86,6 +95,18 @@ nfs_lock_request(struct nfs_page *req) return 1; } +/** + * nfs_list_add_request - Insert a request into a list + * @req: request + * @head: head of list into which to insert the request. + */ +static inline void +nfs_list_add_request(struct nfs_page *req, struct list_head *head) +{ + list_add_tail(&req->wb_list, head); + req->wb_list_head = head; +} + /** * nfs_list_remove_request - Remove a request from its wb_list @@ -96,10 +117,6 @@ nfs_list_remove_request(struct nfs_page { if (list_empty(&req->wb_list)) return; - if (!NFS_WBACK_BUSY(req)) { - printk(KERN_ERR "NFS: unlocked request attempted removed from list!\n"); - BUG(); - } list_del_init(&req->wb_list); req->wb_list_head = NULL; } Index: linux-2.6.12-rc3/include/linux/nfs_xdr.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/nfs_xdr.h +++ linux-2.6.12-rc3/include/linux/nfs_xdr.h @@ -2,12 +2,21 @@ #define _LINUX_NFS_XDR_H #include +#include -struct nfs4_fsid { - __u64 major; - __u64 minor; +struct nfs_fsid { + uint64_t major; + uint64_t minor; }; +/* + * Helper for checking equality between 2 fsids. + */ +static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid *b) +{ + return a->major == b->major && a->minor == b->minor; +} + struct nfs_fattr { unsigned short valid; /* which fields are valid */ __u64 pre_size; /* pre_op_attr.size */ @@ -29,10 +38,7 @@ struct nfs_fattr { } nfs3; } du; dev_t rdev; - union { - __u64 nfs3; /* also nfs2 */ - struct nfs4_fsid nfs4; - } fsid_u; + struct nfs_fsid fsid; __u64 fileid; struct timespec atime; struct timespec mtime; @@ -326,6 +332,20 @@ struct nfs_setattrargs { const u32 * bitmask; }; +struct nfs_setaclargs { + struct nfs_fh * fh; + size_t acl_len; + unsigned int acl_pgbase; + struct page ** acl_pages; +}; + +struct nfs_getaclargs { + struct nfs_fh * fh; + size_t acl_len; + unsigned int acl_pgbase; + struct page ** acl_pages; +}; + struct nfs_setattrres { struct nfs_fattr * fattr; const struct nfs_server * server; @@ -354,6 +374,20 @@ struct nfs_readdirargs { struct page ** pages; }; +struct nfs3_getaclargs { + struct nfs_fh * fh; + int mask; + struct page ** pages; +}; + +struct nfs3_setaclargs { + struct inode * inode; + int mask; + struct posix_acl * acl_access; + struct posix_acl * acl_default; + struct page ** pages; +}; + struct nfs_diropok { struct nfs_fh * fh; struct nfs_fattr * fattr; @@ -477,6 +511,15 @@ struct nfs3_readdirres { int plus; }; +struct nfs3_getaclres { + struct nfs_fattr * fattr; + int mask; + unsigned int acl_access_count; + unsigned int acl_default_count; + struct posix_acl * acl_access; + struct posix_acl * acl_default; +}; + #ifdef CONFIG_NFS_V4 typedef u64 clientid4; @@ -619,6 +662,30 @@ struct nfs4_server_caps_res { u32 has_symlinks; }; +struct nfs_fs_location { + unsigned int serverlen; + char * server; + unsigned int rootpathlen; + char * rootpath; +}; + +#define NFS_FS_LOCATIONS_MAXENTRIES 10 +struct nfs_fs_locations { + struct nfs_fattr fattr; + const struct nfs_server *server; + unsigned int fs_pathlen; + char * fs_path; + int nlocations; + struct nfs_fs_location locations[NFS_FS_LOCATIONS_MAXENTRIES]; +}; + +struct nfs4_fs_locations_arg { + const struct nfs_fh *dir_fh; + const struct qstr *name; + struct page *page; + const u32 *bitmask; +}; + #endif /* CONFIG_NFS_V4 */ struct nfs_page; @@ -667,6 +734,7 @@ struct nfs_rpc_ops { int version; /* Protocol version */ struct dentry_operations *dentry_ops; struct inode_operations *dir_inode_ops; + struct inode_operations *file_inode_ops; int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); @@ -683,7 +751,7 @@ struct nfs_rpc_ops { int (*write) (struct nfs_write_data *); int (*commit) (struct nfs_write_data *); int (*create) (struct inode *, struct dentry *, - struct iattr *, int); + struct iattr *, int, struct nameidata *); int (*remove) (struct inode *, struct qstr *); int (*unlink_setup) (struct rpc_message *, struct dentry *, struct qstr *); @@ -713,6 +781,7 @@ struct nfs_rpc_ops { int (*file_open) (struct inode *, struct file *); int (*file_release) (struct inode *, struct file *); int (*lock)(struct file *, int, struct file_lock *); + void (*clear_acl_cache)(struct inode *); }; /* @@ -732,4 +801,7 @@ extern struct rpc_version nfs_version2; extern struct rpc_version nfs_version3; extern struct rpc_version nfs_version4; +extern struct rpc_version nfsacl_version3; +extern struct rpc_program nfsacl_program; + #endif Index: linux-2.6.12-rc3/include/linux/nfsacl.h =================================================================== --- /dev/null +++ linux-2.6.12-rc3/include/linux/nfsacl.h @@ -0,0 +1,58 @@ +/* + * File: linux/nfsacl.h + * + * (C) 2003 Andreas Gruenbacher + */ +#ifndef __LINUX_NFSACL_H +#define __LINUX_NFSACL_H + +#define NFS_ACL_PROGRAM 100227 + +#define ACLPROC2_GETACL 1 +#define ACLPROC2_SETACL 2 +#define ACLPROC2_GETATTR 3 +#define ACLPROC2_ACCESS 4 + +#define ACLPROC3_GETACL 1 +#define ACLPROC3_SETACL 2 + + +/* Flags for the getacl/setacl mode */ +#define NFS_ACL 0x0001 +#define NFS_ACLCNT 0x0002 +#define NFS_DFACL 0x0004 +#define NFS_DFACLCNT 0x0008 + +/* Flag for Default ACL entries */ +#define NFS_ACL_DEFAULT 0x1000 + +#ifdef __KERNEL__ + +#include + +/* Maximum number of ACL entries over NFS */ +#define NFS_ACL_MAX_ENTRIES 1024 + +#define NFSACL_MAXWORDS (2*(2+3*NFS_ACL_MAX_ENTRIES)) +#define NFSACL_MAXPAGES ((2*(8+12*NFS_ACL_MAX_ENTRIES) + PAGE_SIZE-1) \ + >> PAGE_SHIFT) + +static inline unsigned int +nfsacl_size(struct posix_acl *acl_access, struct posix_acl *acl_default) +{ + unsigned int w = 16; + w += max(acl_access ? (int)acl_access->a_count : 3, 4) * 12; + if (acl_default) + w += max((int)acl_default->a_count, 4) * 12; + return w; +} + +extern unsigned int +nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, + struct posix_acl *acl, int encode_entries, int typeflag); +extern unsigned int +nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, + struct posix_acl **pacl); + +#endif /* __KERNEL__ */ +#endif /* __LINUX_NFSACL_H */ Index: linux-2.6.12-rc3/include/linux/nfsd/nfsd.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/nfsd/nfsd.h +++ linux-2.6.12-rc3/include/linux/nfsd/nfsd.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -124,6 +125,21 @@ int nfsd_statfs(struct svc_rqst *, stru int nfsd_notify_change(struct inode *, struct iattr *); int nfsd_permission(struct svc_export *, struct dentry *, int); +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +#ifdef CONFIG_NFSD_V2_ACL +extern struct svc_version nfsd_acl_version2; +#else +#define nfsd_acl_version2 NULL +#endif +#ifdef CONFIG_NFSD_V3_ACL +extern struct svc_version nfsd_acl_version3; +#else +#define nfsd_acl_version3 NULL +#endif +struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int); +int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *); +#endif + /* * NFSv4 State Index: linux-2.6.12-rc3/include/linux/nfsd/xdr.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/nfsd/xdr.h +++ linux-2.6.12-rc3/include/linux/nfsd/xdr.h @@ -169,4 +169,8 @@ int nfssvc_encode_entry(struct readdir_c int nfssvc_release_fhandle(struct svc_rqst *, u32 *, struct nfsd_fhandle *); +/* Helper functions for NFSv2 ACL code */ +u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp); +u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp); + #endif /* LINUX_NFSD_H */ Index: linux-2.6.12-rc3/include/linux/nfsd/xdr3.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/nfsd/xdr3.h +++ linux-2.6.12-rc3/include/linux/nfsd/xdr3.h @@ -110,6 +110,19 @@ struct nfsd3_commitargs { __u32 count; }; +struct nfsd3_getaclargs { + struct svc_fh fh; + int mask; +}; + +struct posix_acl; +struct nfsd3_setaclargs { + struct svc_fh fh; + int mask; + struct posix_acl *acl_access; + struct posix_acl *acl_default; +}; + struct nfsd3_attrstat { __u32 status; struct svc_fh fh; @@ -209,6 +222,14 @@ struct nfsd3_commitres { struct svc_fh fh; }; +struct nfsd3_getaclres { + __u32 status; + struct svc_fh fh; + int mask; + struct posix_acl *acl_access; + struct posix_acl *acl_default; +}; + /* dummy type for release */ struct nfsd3_fhandle_pair { __u32 dummy; @@ -241,6 +262,7 @@ union nfsd3_xdrstore { struct nfsd3_fsinfores fsinfores; struct nfsd3_pathconfres pathconfres; struct nfsd3_commitres commitres; + struct nfsd3_getaclres getaclres; }; #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore) @@ -316,6 +338,10 @@ int nfs3svc_encode_entry(struct readdir_ int nfs3svc_encode_entry_plus(struct readdir_cd *, const char *name, int namlen, loff_t offset, ino_t ino, unsigned int); +/* Helper functions for NFSv3 ACL code */ +u32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, + struct svc_fh *fhp); +u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp); #endif /* _LINUX_NFSD_XDR3_H */ Index: linux-2.6.12-rc3/include/linux/sunrpc/auth_gss.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/sunrpc/auth_gss.h +++ linux-2.6.12-rc3/include/linux/sunrpc/auth_gss.h @@ -75,14 +75,17 @@ struct gss_cl_ctx { struct xdr_netobj gc_wire_ctx; u32 gc_win; unsigned long gc_expiry; + char gc_principal[0]; }; struct gss_upcall_msg; +struct key; struct gss_cred { struct rpc_cred gc_base; enum rpc_gss_svc gc_service; struct gss_cl_ctx *gc_ctx; struct gss_upcall_msg *gc_upcall; + struct key *gc_key; }; #define gc_uid gc_base.cr_uid Index: linux-2.6.12-rc3/include/linux/sunrpc/clnt.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/sunrpc/clnt.h +++ linux-2.6.12-rc3/include/linux/sunrpc/clnt.h @@ -9,12 +9,9 @@ #ifndef _LINUX_SUNRPC_CLNT_H #define _LINUX_SUNRPC_CLNT_H -#include -#include -#include #include +#include #include -#include #include #include @@ -66,7 +63,6 @@ struct rpc_clnt { struct rpc_portmap cl_pmap_default; char cl_inline_name[32]; }; -#define cl_timeout cl_xprt->timeout #define cl_prog cl_pmap->pm_prog #define cl_vers cl_pmap->pm_vers #define cl_port cl_pmap->pm_port @@ -103,7 +99,6 @@ struct rpc_procinfo { unsigned int p_timer; /* Which RTT timer to use */ }; -#define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt)) #define RPC_PEERADDR(clnt) (&(clnt)->cl_xprt->addr) #ifdef __KERNEL__ @@ -111,6 +106,11 @@ struct rpc_procinfo { struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *info, u32 version, rpc_authflavor_t authflavor); +struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname, + struct rpc_program *info, + u32 version, rpc_authflavor_t authflavor); +struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, + struct rpc_program *, int); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); int rpc_shutdown_client(struct rpc_clnt *); int rpc_destroy_client(struct rpc_clnt *); @@ -129,6 +129,11 @@ void rpc_clnt_sigmask(struct rpc_clnt * void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); size_t rpc_max_payload(struct rpc_clnt *); +int rpc_ping(struct rpc_clnt *clnt, int flags); + +struct rpc_xprt *rpc_client_get_xprt(struct rpc_clnt *clnt); +void rpc_client_set_xprt(struct rpc_clnt *, struct rpc_xprt *); +void rpc_put_xprt(struct rpc_xprt *xprt); static __inline__ int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) Index: linux-2.6.12-rc3/include/linux/sunrpc/sched.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/sunrpc/sched.h +++ linux-2.6.12-rc3/include/linux/sunrpc/sched.h @@ -31,7 +31,6 @@ struct rpc_wait_queue; struct rpc_wait { struct list_head list; /* wait queue links */ struct list_head links; /* Links to related tasks */ - wait_queue_head_t waitq; /* sync: sleep on this q */ struct rpc_wait_queue * rpc_waitq; /* RPC wait queue we're on */ }; @@ -44,6 +43,7 @@ struct rpc_task { #endif struct list_head tk_task; /* global list of tasks */ struct rpc_clnt * tk_client; /* RPC client */ + struct rpc_xprt * tk_xprt; /* RPC request */ struct rpc_rqst * tk_rqstp; /* RPC request */ int tk_status; /* result of last operation */ @@ -94,7 +94,6 @@ struct rpc_task { #endif }; #define tk_auth tk_client->cl_auth -#define tk_xprt tk_client->cl_xprt /* support walking a list of tasks on a wait queue */ #define task_for_each(task, pos, head) \ Index: linux-2.6.12-rc3/include/linux/sunrpc/svc.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/sunrpc/svc.h +++ linux-2.6.12-rc3/include/linux/sunrpc/svc.h @@ -185,6 +185,17 @@ xdr_ressize_check(struct svc_rqst *rqstp return vec->iov_len <= PAGE_SIZE; } +static inline struct page * +svc_take_res_page(struct svc_rqst *rqstp) +{ + if (rqstp->rq_arghi <= rqstp->rq_argused) + return NULL; + rqstp->rq_arghi--; + rqstp->rq_respages[rqstp->rq_resused] = + rqstp->rq_argpages[rqstp->rq_arghi]; + return rqstp->rq_respages[rqstp->rq_resused++]; +} + static inline int svc_take_page(struct svc_rqst *rqstp) { if (rqstp->rq_arghi <= rqstp->rq_argused) @@ -240,9 +251,10 @@ struct svc_deferred_req { }; /* - * RPC program + * List of RPC programs on the same transport endpoint */ struct svc_program { + struct svc_program * pg_next; /* other programs (same xprt) */ u32 pg_prog; /* program number */ unsigned int pg_lovers; /* lowest version */ unsigned int pg_hivers; /* lowest version */ Index: linux-2.6.12-rc3/include/linux/sunrpc/xdr.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/sunrpc/xdr.h +++ linux-2.6.12-rc3/include/linux/sunrpc/xdr.h @@ -146,7 +146,8 @@ extern void xdr_shift_buf(struct xdr_buf extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, int, int); extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, int); -extern int read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len); +extern int read_bytes_from_xdr_buf(struct xdr_buf *, int, void *, int); +extern int write_bytes_to_xdr_buf(struct xdr_buf *, int, void *, int); /* * Helper structure for copying from an sk_buff. @@ -160,7 +161,7 @@ typedef struct { typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); -extern void xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, +extern int xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, skb_reader_t *, skb_read_actor_t); struct socket; @@ -168,6 +169,22 @@ struct sockaddr; extern int xdr_sendpages(struct socket *, struct sockaddr *, int, struct xdr_buf *, unsigned int, int); +extern int xdr_encode_word(struct xdr_buf *, int, u32); +extern int xdr_decode_word(struct xdr_buf *, int, u32 *); + +struct xdr_array2_desc; +typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem); +struct xdr_array2_desc { + unsigned int elem_size; + unsigned int array_len; + xdr_xcode_elem_t xcode; +}; + +extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc); +extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc); + /* * Provide some simple tools for XDR buffer overflow-checking etc. */ @@ -186,6 +203,7 @@ extern void xdr_write_pages(struct xdr_s extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p); extern uint32_t *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); +extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); #endif /* __KERNEL__ */ Index: linux-2.6.12-rc3/include/linux/sunrpc/xprt.h =================================================================== --- linux-2.6.12-rc3.orig/include/linux/sunrpc/xprt.h +++ linux-2.6.12-rc3/include/linux/sunrpc/xprt.h @@ -9,7 +9,6 @@ #ifndef _LINUX_SUNRPC_XPRT_H #define _LINUX_SUNRPC_XPRT_H -#include #include #include #include @@ -127,6 +126,7 @@ struct rpc_rqst { #define XPRT_COPY_DATA (1 << 3) struct rpc_xprt { + atomic_t count; /* Reference counter */ struct socket * sock; /* BSD socket layer */ struct sock * inet; /* INET layer */ Index: linux-2.6.12-rc3/lib/Makefile =================================================================== --- linux-2.6.12-rc3.orig/lib/Makefile +++ linux-2.6.12-rc3/lib/Makefile @@ -8,7 +8,7 @@ lib-y := errno.o ctype.o string.o vsprin bitmap.o extable.o kobject_uevent.o prio_tree.o sha1.o \ halfmd4.o -obj-y += sort.o parser.o +obj-y += sort.o parser.o iosem.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG Index: linux-2.6.12-rc3/lib/iosem.c =================================================================== --- /dev/null +++ linux-2.6.12-rc3/lib/iosem.c @@ -0,0 +1,177 @@ +/* + * linux/lib/iosem.c + * + * Copyright (C) 2005 Trond Myklebust + * + * A set of primitives for semaphore-like locks that also support notification + * callbacks for waiters. + */ +#include +#include +#include +#include +#include + +/* + * Common function for requesting an exclusive lock on an iosem + * + * Note: should be called while holding the non-irqsafe spinlock + * lk->wait.lock. The spinlock is non-irqsafe as we have no reason (yet) to + * expect anyone to take/release iosems from within an interrupt + * context (and 'cos it is a _bug_ to attempt to wake up the waitqueue + * lk->wait using anything other than iosem_unlock()). + */ +static inline int __iosem_lock(struct iosem *lk, struct iosem_wait *waiter) +{ + int ret; + + if (lk->state != 0) { + /* The lock cannot be immediately granted: queue waiter */ + waiter->lock = lk; + add_wait_queue_exclusive_locked(&lk->wait, &waiter->wait); + ret = -EINPROGRESS; + } else { + lk->state |= 1 << IOSEM_LOCK_EXCLUSIVE; + ret = 0; + } + return ret; +} + +/** + * iosem_unlock - release an exclusive lock + * @iosem - the iosem on which we hold an exclusive lock + */ +void fastcall iosem_unlock(struct iosem *lk) +{ + spin_lock(&lk->wait.lock); + lk->state &= ~(1 << IOSEM_LOCK_EXCLUSIVE); + wake_up_locked(&lk->wait); + spin_unlock(&lk->wait.lock); +} +EXPORT_SYMBOL(iosem_unlock); + +/** + * iosem_lock_wake_function - take an exclusive lock and wake up sleeping task + * @wait: waitqueue entry. Must be part of an initialized struct iosem_wait + * @mode: + * @sync: + * @key: + * + * Standard wait_queue_func_t callback function used by iosem_lock(). When + * called, it will attempt to wake up the sleeping task, and set an + * exclusive lock on the iosem. + * On success, @wait is automatically removed from the iosem's waitqueue, + * and a non-zero value is returned. + * + * This function will in practice *always* be called from within iosem_unlock() + */ +int iosem_lock_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct iosem_wait *waiter = container_of(wait, struct iosem_wait, wait); + unsigned long *lk_state = &waiter->lock->state; + int ret = 0; + + if (*lk_state == 0) { + ret = default_wake_function(wait, mode, sync, key); + if (ret) { + *lk_state |= 1 << IOSEM_LOCK_EXCLUSIVE; + list_del_init(&wait->task_list); + } + } + return ret; +} + +/** + * iosem_lock - synchronously take an exclusive lock + * @iosem - the iosem to take an exclusive lock + * + * If the exclusive lock cannot be immediately granted, put the current task + * to uninterruptible sleep until it can. + */ +void fastcall iosem_lock(struct iosem *lk) +{ + struct iosem_wait waiter; + + might_sleep(); + + iosem_waiter_init(&waiter); + waiter.wait.func = iosem_lock_wake_function; + + spin_lock(&lk->wait.lock); + if (__iosem_lock(lk, &waiter) != 0) { + /* Must wait for lock... */ + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (list_empty(&waiter.wait.task_list)) + break; + spin_unlock(&lk->wait.lock); + schedule(); + spin_lock(&lk->wait.lock); + } + __set_current_state(TASK_RUNNING); + } + spin_unlock(&lk->wait.lock); +} +EXPORT_SYMBOL(iosem_lock); + +/** + * iosem_lock_and_schedule_function - take an exclusive lock and schedule work + * @wait: waitqueue entry. Must be part of an initialized struct iosem_work + * @mode: unused + * @sync: unused + * @key: unused + * + * Standard wait_queue_func_t callback function used by + * iosem_lock_and_schedule_work. When called, it will attempt to queue the + * work function and set the exclusive lock on the iosem. + * On success, @wait is removed from the iosem's waitqueue, and a non-zero + * value is returned. + * + * This function will in practice *always* be called from within iosem_unlock() + */ +int iosem_lock_and_schedule_function(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct iosem_wait *waiter = container_of(wait, struct iosem_wait, wait); + struct iosem_work *wk = container_of(waiter, struct iosem_work, waiter); + unsigned long *lk_state = &waiter->lock->state; + int ret = 0; + + if (*lk_state == 0) { + ret = schedule_work(&wk->work); + if (ret) { + *lk_state |= 1 << IOSEM_LOCK_EXCLUSIVE; + list_del_init(&wait->task_list); + } + } + return ret; +} + +/** + * iosem_lock_and_schedule_work - request an exclusive lock and schedule work + * @lk: pointer to iosem + * @wk: pointer to iosem_work + * + * Request an exclusive lock on the iosem. If the lock cannot be immediately + * granted, place wk->waiter on the iosem's waitqueue, and return, else + * immediately queue the work function wk->work. + * + * Once the exclusive lock has been granted, the work function described by + * wk->work is queued in keventd. It is then the responsibility of that work + * function to release the exclusive lock once it has been granted. + * + * returns -EINPROGRESS if the lock could not be immediately granted. + */ +int fastcall iosem_lock_and_schedule_work(struct iosem *lk, struct iosem_work *wk) +{ + int ret; + + iosem_waiter_init(&wk->waiter); + wk->waiter.wait.func = iosem_lock_and_schedule_function; + spin_lock(&lk->wait.lock); + ret = __iosem_lock(lk, &wk->waiter); + spin_unlock(&lk->wait.lock); + if (ret == 0) + ret = schedule_work(&wk->work); + return ret; +} +EXPORT_SYMBOL(iosem_lock_and_schedule_work); Index: linux-2.6.12-rc3/net/sunrpc/auth.c =================================================================== --- linux-2.6.12-rc3.orig/net/sunrpc/auth.c +++ linux-2.6.12-rc3/net/sunrpc/auth.c @@ -66,10 +66,10 @@ rpcauth_create(rpc_authflavor_t pseudofl u32 flavor = pseudoflavor_to_flavor(pseudoflavor); if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor])) - return NULL; + return ERR_PTR(-EINVAL); auth = ops->create(clnt, pseudoflavor); - if (!auth) - return NULL; + if (IS_ERR(auth)) + return auth; if (clnt->cl_auth) rpcauth_destroy(clnt->cl_auth); clnt->cl_auth = auth; Index: linux-2.6.12-rc3/net/sunrpc/auth_gss/auth_gss.c =================================================================== --- linux-2.6.12-rc3.orig/net/sunrpc/auth_gss/auth_gss.c +++ linux-2.6.12-rc3/net/sunrpc/auth_gss/auth_gss.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,18 @@ #include #include +#ifdef CONFIG_RPCSEC_GSS_KEYRING + +#include + +#else + +#define gss_key_lookup_cred(auth) ERR_PTR(-ENOKEY) +#define gss_register_keytype() (0) +#define gss_unregister_keytype() do { } while(0) + +#endif + static struct rpc_authops authgss_ops; static struct rpc_credops gss_credops; @@ -83,6 +96,7 @@ static struct rpc_credops gss_credops; static DEFINE_RWLOCK(gss_ctx_lock); struct gss_auth { + spinlock_t lock; struct rpc_auth rpc_auth; struct gss_api_mech *mech; enum rpc_gss_svc service; @@ -90,7 +104,7 @@ struct gss_auth { struct rpc_clnt *client; struct dentry *dentry; char path[48]; - spinlock_t lock; + char key_name[256]; }; static void gss_destroy_ctx(struct gss_cl_ctx *); @@ -221,17 +235,19 @@ gss_cred_get_ctx(struct rpc_cred *cred) } static struct gss_cl_ctx * -gss_alloc_context(void) +gss_alloc_context(const char *principal) { struct gss_cl_ctx *ctx; + size_t len = strlen(principal) + 1; - ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kmalloc(sizeof(*ctx) + len, GFP_KERNEL); if (ctx != NULL) { memset(ctx, 0, sizeof(*ctx)); ctx->gc_proc = RPC_GSS_PROC_DATA; ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ spin_lock_init(&ctx->gc_seq_lock); atomic_set(&ctx->count,1); + memcpy(ctx->gc_principal, principal, len); } return ctx; } @@ -535,6 +551,7 @@ gss_pipe_downcall(struct file *filp, con struct rpc_cred *cred; struct gss_upcall_msg *gss_msg; struct gss_cl_ctx *ctx; + char principal[32]; uid_t uid; int err = -EFBIG; @@ -558,7 +575,8 @@ gss_pipe_downcall(struct file *filp, con } err = -ENOMEM; - ctx = gss_alloc_context(); + snprintf(principal, sizeof(principal), "%u@%s", uid, clnt->cl_server); + ctx = gss_alloc_context(principal); if (ctx == NULL) goto err; err = 0; @@ -660,14 +678,16 @@ gss_create(struct rpc_clnt *clnt, rpc_au { struct gss_auth *gss_auth; struct rpc_auth * auth; + int err = -ENOMEM; /* XXX? */ dprintk("RPC: creating GSS authenticator for client %p\n",clnt); if (!try_module_get(THIS_MODULE)) - return NULL; + return ERR_PTR(err); if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) goto out_dec; gss_auth->client = clnt; + err = -EINVAL; gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); if (!gss_auth->mech) { printk(KERN_WARNING "%s: Pseudoflavor %d not found!", @@ -675,9 +695,8 @@ gss_create(struct rpc_clnt *clnt, rpc_au goto err_free; } gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); - /* FIXME: Will go away once privacy support is merged in */ - if (gss_auth->service == RPC_GSS_SVC_PRIVACY) - gss_auth->service = RPC_GSS_SVC_INTEGRITY; + if (gss_auth->service == 0) + goto err_put_mech; INIT_LIST_HEAD(&gss_auth->upcalls); spin_lock_init(&gss_auth->lock); auth = &gss_auth->rpc_auth; @@ -687,15 +706,25 @@ gss_create(struct rpc_clnt *clnt, rpc_au auth->au_flavor = flavor; atomic_set(&auth->au_count, 1); - if (rpcauth_init_credcache(auth, GSS_CRED_EXPIRE) < 0) + err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE); + if (err) goto err_put_mech; + snprintf(gss_auth->key_name, sizeof(gss_auth->key_name), + "mechanism=\"%s\" service=\"%s%u\" host=\"%s\"", + gss_auth->mech->gm_pfs[gss_auth->service-RPC_GSS_SVC_NONE].name, + clnt->cl_protname, + clnt->cl_vers, + clnt->cl_server); + snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s", clnt->cl_pathname, gss_auth->mech->gm_name); gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); - if (IS_ERR(gss_auth->dentry)) + if (IS_ERR(gss_auth->dentry)) { + err = PTR_ERR(gss_auth->dentry); goto err_put_mech; + } return auth; err_put_mech: @@ -704,7 +733,7 @@ err_free: kfree(gss_auth); out_dec: module_put(THIS_MODULE); - return NULL; + return ERR_PTR(err); } static void @@ -739,6 +768,23 @@ gss_destroy_ctx(struct gss_cl_ctx *ctx) kfree(ctx); } +static inline struct gss_cred * +gss_alloc_cred(struct gss_auth *gss_auth) +{ + struct gss_cred *cred; + + dprintk("RPC: gss_alloc_cred \n"); + + cred = kmalloc(sizeof(*cred), GFP_KERNEL); + if (cred != NULL) { + memset(cred, 0, sizeof(*cred)); + atomic_set(&cred->gc_count, 1); + cred->gc_base.cr_ops = &gss_credops; + cred->gc_service = gss_auth->service; + } + return cred; +} + static void gss_destroy_cred(struct rpc_cred *rc) { @@ -748,16 +794,266 @@ gss_destroy_cred(struct rpc_cred *rc) if (cred->gc_ctx) gss_put_ctx(cred->gc_ctx); + if (cred->gc_key) + key_put(cred->gc_key); kfree(cred); } +#ifdef CONFIG_RPCSEC_GSS_KEYRING +static inline const void * +simple_skip_bytes(const void *p, const void *end, size_t len) +{ + const void *q = (const void *)((const char *)p + len); + if (unlikely(q > end || q < p)) + return ERR_PTR(-EFAULT); + return q; +} + + +static struct gss_cl_ctx * +gss_key_read_context(const void *p, const void *end) +{ + struct gss_cl_ctx *ctx; + struct gss_api_mech *mech; + size_t len, maxlen; + + /* First up should be the name of the mechanism */ + maxlen = end - p; + len = strnlen((const char *)p, maxlen); + if (len == maxlen) + return ERR_PTR(-EFAULT); + /* find the mechanism */ + mech = gss_mech_get_by_name((const char *)p); + if (mech == NULL) + return ERR_PTR(-EINVAL); + p = simple_skip_bytes(p, end, len + 1); + if (IS_ERR(p)) + goto err_put_mech; + /* Next we want the name of the principal */ + maxlen = end - p; + len = strnlen((const char *)p, maxlen); + if (len == maxlen) { + p = ERR_PTR(-EFAULT); + goto err_put_mech; + } + ctx = gss_alloc_context((const char *)p); + p = simple_skip_bytes(p, end, len + 1); + if (IS_ERR(p)) + goto err_free_ctx; + /* Now read in context */ + p = gss_fill_context(p, end, ctx, mech); + if (IS_ERR(p)) + goto err_free_ctx; + return ctx; +err_free_ctx: + kfree(ctx); +err_put_mech: + gss_mech_put(mech); + return (struct gss_cl_ctx *)p; +} + +static int +gss_key_instantiate(struct key *key, const void *p, size_t buflen) +{ + const void *end = (const void *)((const char *)p + buflen); + struct gss_cl_ctx *ctx; + + ctx = gss_key_read_context(p, end); + if (IS_ERR(ctx)) + goto err; + write_lock(&key->lock); + key->payload.data = ctx; + key->expiry = get_seconds() + (ctx->gc_expiry - jiffies)/HZ; + write_unlock(&key->lock); + return 0; +err: + return PTR_ERR(ctx); +} + +static int +gss_key_duplicate(struct key *key, const struct key *source) +{ + struct gss_cl_ctx *ctx = (struct gss_cl_ctx *)source->payload.data; + + if (ctx != NULL) { + gss_mech_get(ctx->gc_gss_ctx->mech_type); + write_lock(&key->lock); + key->payload.data = gss_get_ctx(ctx); + key->expiry = source->expiry; + write_unlock(&key->lock); + } + return 0; +} + +static int +gss_key_update(struct key *key, const void *p, size_t buflen) +{ + const void *end = (const void *)((const char *)p + buflen); + struct gss_cl_ctx *ctx, *old; + + ctx = gss_key_read_context(p, end); + if (IS_ERR(ctx)) + goto err; + write_lock(&key->lock); + old = (struct gss_cl_ctx *) key->payload.data; + key->payload.data = ctx; + key->expiry = get_seconds() + (ctx->gc_expiry - jiffies)/HZ; + write_unlock(&key->lock); + if (old) + gss_put_ctx(ctx); + return 0; +err: + return PTR_ERR(ctx); +} + +static int +gss_key_match(const struct key *key, const void *description) +{ + return key->description != NULL && + strcmp(key->description, description) == 0; +} + +static void +gss_key_destroy(struct key *key) +{ + struct gss_cl_ctx *ctx = (struct gss_cl_ctx *)key->payload.data; + if (ctx != NULL) { + struct gss_api_mech *mech = ctx->gc_gss_ctx->mech_type; + gss_put_ctx(ctx); + gss_mech_put(mech); + } +} + +static void +gss_key_describe(const struct key *key, struct seq_file *m) +{ + struct gss_cl_ctx *ctx = NULL; + + seq_puts(m, key->description); + + if (key->payload.data) + ctx = gss_get_ctx((struct gss_cl_ctx *)key->payload.data); + if (ctx != NULL) { + seq_printf(m, ": %s", ctx->gc_principal); + gss_put_ctx(ctx); + } else + seq_printf(m, ": "); +} + +static struct key_type key_type_rpcsec_context = { + .name = "rpcsec_gss context", + .def_datalen = sizeof(struct gss_cl_ctx) + sizeof(struct gss_ctx), + .instantiate = gss_key_instantiate, + .duplicate = gss_key_duplicate, + .update = gss_key_update, + .match = gss_key_match, + .destroy = gss_key_destroy, + .describe = gss_key_describe, +}; + +static struct key * +gss_request_key(struct gss_auth *gss_auth) +{ + struct key *key; + struct rpc_clnt *clnt = gss_auth->client; + char args[384]; + + snprintf(args, sizeof(args), "%s ip=\"%u.%u.%u.%u\" port=\"%u\" proto=\"%s\"", + gss_auth->key_name, + NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr), + clnt->cl_port, + clnt->cl_prot == IPPROTO_TCP ? "tcp" : "udp"); + dprintk("%s: requesting key %s with args %s\n", __FUNCTION__, + gss_auth->key_name, args); + + key = request_key(&key_type_rpcsec_context, gss_auth->key_name, args); + if (IS_ERR(key)) + goto out_err; + dprintk("%s: returned success\n", __FUNCTION__); + return key; +out_err: + dprintk("%s: returned error %ld\n", __FUNCTION__, -PTR_ERR(key)); + return key; +} + + +static inline struct gss_cl_ctx * +gss_key_lookup_context(struct key *key) +{ + struct gss_cl_ctx *ctx = ERR_PTR(-ENOKEY); + + read_lock(&key->lock); + if (key->payload.data != NULL) + ctx = gss_get_ctx((struct gss_cl_ctx *)key->payload.data); + read_unlock(&key->lock); + return ctx; +} + +static inline struct rpc_cred * +gss_key_lookup_cred(struct rpc_auth *auth) +{ + struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); + struct gss_cred *gss_cred; + struct gss_cl_ctx *ctx; + struct key *key; + void *err; + + err = key = gss_request_key(gss_auth); + if (IS_ERR(key)) + goto out_no_key; + err = ctx = gss_key_lookup_context(key); + if (IS_ERR(ctx)) + goto out_put_key; + gss_cred = gss_alloc_cred(gss_auth); + if (gss_cred == NULL) + goto out_no_cred; + gss_cred_set_ctx(&gss_cred->gc_base, ctx); + gss_cred->gc_key = key; + return &gss_cred->gc_base; +out_no_cred: + err = ERR_PTR(-ENOMEM); +out_put_key: + key_put(key); +out_no_key: + return (struct rpc_cred *)err; +} + +static inline int +gss_register_keytype(void) +{ + return register_key_type(&key_type_rpcsec_context); +} + +static inline void +gss_unregister_keytype(void) +{ + unregister_key_type(&key_type_rpcsec_context); +} +#endif + /* * Lookup RPCSEC_GSS cred for the current process */ static struct rpc_cred * gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags) { - return rpcauth_lookup_credcache(auth, acred, taskflags); + struct rpc_cred *cred; + + /* Try to use the keyring upcall first */ + cred = gss_key_lookup_cred(auth); + if (!IS_ERR(cred)) + goto out; + switch (PTR_ERR(cred)) { + case -EKEYREVOKED: + case -EKEYEXPIRED: + /* Translate into EACCES */ + cred = ERR_PTR(-EACCES); + break; + case -ENOKEY: + cred = rpcauth_lookup_credcache(auth, acred, taskflags); + }; +out: + return cred; } static struct rpc_cred * @@ -770,25 +1066,14 @@ gss_create_cred(struct rpc_auth *auth, s dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", acred->uid, auth->au_flavor); - if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) + cred = gss_alloc_cred(gss_auth); + if (cred == NULL) goto out_err; - - memset(cred, 0, sizeof(*cred)); - atomic_set(&cred->gc_count, 1); cred->gc_uid = acred->uid; - /* - * Note: in order to force a call to call_refresh(), we deliberately - * fail to flag the credential as RPCAUTH_CRED_UPTODATE. - */ - cred->gc_flags = 0; - cred->gc_base.cr_ops = &gss_credops; - cred->gc_service = gss_auth->service; err = gss_create_upcall(gss_auth, cred); if (err < 0) goto out_err; - return &cred->gc_base; - out_err: dprintk("RPC: gss_create_cred failed with error %d\n", err); if (cred) gss_destroy_cred(&cred->gc_base); @@ -1134,7 +1419,12 @@ static int __init init_rpcsec_gss(void) err = gss_svc_init(); if (err) goto out_unregister; + err = gss_register_keytype(); + if (err) + goto out_shutdown_svc; return 0; +out_shutdown_svc: + gss_svc_shutdown(); out_unregister: rpcauth_unregister(&authgss_ops); out: @@ -1143,6 +1433,7 @@ out: static void __exit exit_rpcsec_gss(void) { + gss_unregister_keytype(); gss_svc_shutdown(); rpcauth_unregister(&authgss_ops); } Index: linux-2.6.12-rc3/net/sunrpc/clnt.c =================================================================== --- linux-2.6.12-rc3.orig/net/sunrpc/clnt.c +++ linux-2.6.12-rc3/net/sunrpc/clnt.c @@ -97,12 +97,13 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, * made to sleep too long. */ struct rpc_clnt * -rpc_create_client(struct rpc_xprt *xprt, char *servname, +rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor) { struct rpc_version *version; struct rpc_clnt *clnt = NULL; + struct rpc_auth *auth; int err; int len; @@ -157,10 +158,11 @@ rpc_create_client(struct rpc_xprt *xprt, if (err < 0) goto out_no_path; - err = -ENOMEM; - if (!rpcauth_create(flavor, clnt)) { + auth = rpcauth_create(flavor, clnt); + if (IS_ERR(auth)) { printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n", flavor); + err = PTR_ERR(auth); goto out_no_auth; } @@ -178,6 +180,37 @@ out_no_path: kfree(clnt->cl_server); kfree(clnt); out_err: + rpc_put_xprt(xprt); + return ERR_PTR(err); +} + +/** + * Create an RPC client + * @xprt - pointer to xprt struct + * @servname - name of server + * @info - rpc_program + * @version - rpc_program version + * @authflavor - rpc_auth flavour to use + * + * Creates an RPC client structure, then pings the server in order to + * determine if it is up, and if it supports this program and version. + * + * This function should never be called by asynchronous tasks such as + * the portmapper. + */ +struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, + struct rpc_program *info, u32 version, rpc_authflavor_t authflavor) +{ + struct rpc_clnt *clnt; + int err; + + clnt = rpc_new_client(xprt, servname, info, version, authflavor); + if (IS_ERR(clnt)) + return clnt; + err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); + if (err == 0) + return clnt; + rpc_shutdown_client(clnt); return ERR_PTR(err); } @@ -208,6 +241,8 @@ rpc_clone_client(struct rpc_clnt *clnt) rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); + new->cl_pmap = &new->cl_pmap_default; + rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait"); return new; out_no_clnt: printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); @@ -270,7 +305,7 @@ rpc_destroy_client(struct rpc_clnt *clnt if (clnt->cl_pathname[0]) rpc_rmdir(clnt->cl_pathname); if (clnt->cl_xprt) { - xprt_destroy(clnt->cl_xprt); + rpc_put_xprt(clnt->cl_xprt); clnt->cl_xprt = NULL; } if (clnt->cl_server != clnt->cl_inline_name) @@ -296,6 +331,44 @@ rpc_release_client(struct rpc_clnt *clnt rpc_destroy_client(clnt); } +/** + * rpc_bind_new_program - bind a new RPC program to an existing client + * @old - old rpc_client + * @program - rpc program to set + * @vers - rpc program version + * + * Clones the rpc client and sets up a new RPC program. This is mainly + * of use for enabling different RPC programs to share the same transport. + * The Sun NFSv2/v3 ACL protocol can do this. + */ +struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, + struct rpc_program *program, + int vers) +{ + struct rpc_clnt *clnt; + struct rpc_version *version; + int err; + + BUG_ON(vers >= program->nrvers || !program->version[vers]); + version = program->version[vers]; + clnt = rpc_clone_client(old); + if (IS_ERR(clnt)) + goto out; + clnt->cl_procinfo = version->procs; + clnt->cl_maxproc = version->nrprocs; + clnt->cl_protname = program->name; + clnt->cl_prog = program->number; + clnt->cl_vers = version->number; + clnt->cl_stats = program->stats; + err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); + if (err != 0) { + rpc_shutdown_client(clnt); + clnt = ERR_PTR(err); + } +out: + return clnt; +} + /* * Default callback for async RPC calls */ @@ -377,6 +450,41 @@ out: return status; } +/** + * rpc_client_get_xprt() - Get reference to the RPC transport struct + * @clnt - pointer to RPC client + */ +struct rpc_xprt *rpc_client_get_xprt(struct rpc_clnt *clnt) +{ + struct rpc_xprt *xprt; + + /* Synchronize w.r.t. rpc_client_set_xprt() */ + rcu_read_lock(); + xprt = clnt->cl_xprt; + atomic_inc(&xprt->count); + rcu_read_unlock(); + return xprt; +} + +/** + * rpc_client_set_xprt() - Change the transport struct pointer on an in-use RPC client + * @clnt - pointer to RPC client + * @xprt - new transport + * + * This function should be called VERY infrequently, and is designed + * to be called only in case of a failover mount. + */ +void rpc_client_set_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) +{ + struct rpc_xprt *old; + + old = xchg(&clnt->cl_xprt, xprt); + /* Wait for all reads of clnt->cl_xprt == old to complete */ + synchronize_kernel(); + rpc_put_xprt(old); +} +EXPORT_SYMBOL(rpc_client_set_xprt); + /* * New rpc_call implementation */ @@ -440,7 +548,7 @@ rpc_call_setup(struct rpc_task *task, st void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) { - struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_xprt *xprt = rpc_client_get_xprt(clnt); xprt->sndsize = 0; if (sndsize) @@ -450,6 +558,7 @@ rpc_setbufsize(struct rpc_clnt *clnt, un xprt->rcvsize = rcvsize + RPC_SLACK_SPACE; if (xprt_connected(xprt)) xprt_sock_setbufsize(xprt); + rpc_put_xprt(xprt); } /* @@ -658,7 +767,7 @@ static void call_bind(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_xprt *xprt = task->tk_xprt; dprintk("RPC: %4d call_bind xprt %p %s connected\n", task->tk_pid, xprt, (xprt_connected(xprt) ? "is" : "is not")); @@ -678,12 +787,10 @@ call_bind(struct rpc_task *task) static void call_connect(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; - dprintk("RPC: %4d call_connect status %d\n", task->tk_pid, task->tk_status); - if (xprt_connected(clnt->cl_xprt)) { + if (xprt_connected(task->tk_xprt)) { task->tk_action = call_transmit; return; } @@ -944,7 +1051,7 @@ static u32 * call_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; u32 *p = req->rq_svec[0].iov_base; @@ -957,7 +1064,9 @@ call_header(struct rpc_task *task) *p++ = htonl(clnt->cl_prog); /* program number */ *p++ = htonl(clnt->cl_vers); /* program version */ *p++ = htonl(task->tk_msg.rpc_proc->p_proc); /* procedure */ - return rpcauth_marshcred(task, p); + p = rpcauth_marshcred(task, p); + req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p); + return p; } /* @@ -986,10 +1095,11 @@ call_verify(struct rpc_task *task) case RPC_AUTH_ERROR: break; case RPC_MISMATCH: - printk(KERN_WARNING "%s: RPC call version mismatch!\n", __FUNCTION__); - goto out_eio; + dprintk("%s: RPC call version mismatch!\n", __FUNCTION__); + error = -EPROTONOSUPPORT; + goto out_err; default: - printk(KERN_WARNING "%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n); + dprintk("%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n); goto out_eio; } if (--len < 0) @@ -1040,23 +1150,26 @@ call_verify(struct rpc_task *task) case RPC_SUCCESS: return p; case RPC_PROG_UNAVAIL: - printk(KERN_WARNING "RPC: call_verify: program %u is unsupported by server %s\n", + dprintk("RPC: call_verify: program %u is unsupported by server %s\n", (unsigned int)task->tk_client->cl_prog, task->tk_client->cl_server); - goto out_eio; + error = -EPFNOSUPPORT; + goto out_err; case RPC_PROG_MISMATCH: - printk(KERN_WARNING "RPC: call_verify: program %u, version %u unsupported by server %s\n", + dprintk("RPC: call_verify: program %u, version %u unsupported by server %s\n", (unsigned int)task->tk_client->cl_prog, (unsigned int)task->tk_client->cl_vers, task->tk_client->cl_server); - goto out_eio; + error = -EPROTONOSUPPORT; + goto out_err; case RPC_PROC_UNAVAIL: - printk(KERN_WARNING "RPC: call_verify: proc %p unsupported by program %u, version %u on server %s\n", + dprintk("RPC: call_verify: proc %p unsupported by program %u, version %u on server %s\n", task->tk_msg.rpc_proc, task->tk_client->cl_prog, task->tk_client->cl_vers, task->tk_client->cl_server); - goto out_eio; + error = -EOPNOTSUPP; + goto out_err; case RPC_GARBAGE_ARGS: dprintk("RPC: %4d %s: server saw garbage\n", task->tk_pid, __FUNCTION__); break; /* retry */ @@ -1069,7 +1182,7 @@ out_retry: task->tk_client->cl_stats->rpcgarbage++; if (task->tk_garb_retry) { task->tk_garb_retry--; - dprintk(KERN_WARNING "RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid); + dprintk("RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid); task->tk_action = call_bind; return NULL; } @@ -1083,3 +1196,30 @@ out_overflow: printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__); goto out_retry; } + +static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj) +{ + return 0; +} + +static int rpcproc_decode_null(void *rqstp, u32 *data, void *obj) +{ + return 0; +} + +static struct rpc_procinfo rpcproc_null = { + .p_encode = rpcproc_encode_null, + .p_decode = rpcproc_decode_null, +}; + +int rpc_ping(struct rpc_clnt *clnt, int flags) +{ + struct rpc_message msg = { + .rpc_proc = &rpcproc_null, + }; + int err; + msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0); + err = rpc_call_sync(clnt, &msg, flags); + put_rpccred(msg.rpc_cred); + return err; +} Index: linux-2.6.12-rc3/net/sunrpc/pmap_clnt.c =================================================================== --- linux-2.6.12-rc3.orig/net/sunrpc/pmap_clnt.c +++ linux-2.6.12-rc3/net/sunrpc/pmap_clnt.c @@ -39,7 +39,7 @@ void rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) { struct rpc_portmap *map = clnt->cl_pmap; - struct sockaddr_in *sap = &clnt->cl_xprt->addr; + struct sockaddr_in *sap = &task->tk_xprt->addr; struct rpc_message msg = { .rpc_proc = &pmap_procedures[PMAP_GETPORT], .rpc_argp = map, @@ -53,6 +53,9 @@ rpc_getport(struct rpc_task *task, struc task->tk_pid, clnt->cl_server, map->pm_prog, map->pm_vers, map->pm_prot); + /* Autobind on cloned rpc clients is discouraged */ + BUG_ON(clnt->cl_parent != clnt); + spin_lock(&pmap_lock); if (map->pm_binding) { rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL); @@ -207,12 +210,10 @@ pmap_create(char *hostname, struct socka xprt->addr.sin_port = htons(RPC_PMAP_PORT); /* printk("pmap: create clnt\n"); */ - clnt = rpc_create_client(xprt, hostname, + clnt = rpc_new_client(xprt, hostname, &pmap_program, RPC_PMAP_VERSION, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) { - xprt_destroy(xprt); - } else { + if (!IS_ERR(clnt)) { clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; Index: linux-2.6.12-rc3/net/sunrpc/rpc_pipe.c =================================================================== --- linux-2.6.12-rc3.orig/net/sunrpc/rpc_pipe.c +++ linux-2.6.12-rc3/net/sunrpc/rpc_pipe.c @@ -291,14 +291,16 @@ static int rpc_show_info(struct seq_file *m, void *v) { struct rpc_clnt *clnt = m->private; + struct rpc_xprt *xprt = rpc_client_get_xprt(clnt); seq_printf(m, "RPC server: %s\n", clnt->cl_server); seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, clnt->cl_prog, clnt->cl_vers); seq_printf(m, "address: %u.%u.%u.%u\n", - NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr)); + NIPQUAD(xprt->addr.sin_addr.s_addr)); seq_printf(m, "protocol: %s\n", - clnt->cl_xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + rpc_put_xprt(xprt); return 0; } Index: linux-2.6.12-rc3/net/sunrpc/sched.c =================================================================== --- linux-2.6.12-rc3.orig/net/sunrpc/sched.c +++ linux-2.6.12-rc3/net/sunrpc/sched.c @@ -290,7 +290,7 @@ static void rpc_make_runnable(struct rpc return; } } else - wake_up(&task->u.tk_wait.waitq); + wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED); } /* @@ -554,6 +554,14 @@ __rpc_atrun(struct rpc_task *task) rpc_wake_up_task(task); } +static int rpc_wait_bit_interruptible(void *word) +{ + if (signal_pending(current)) + return -ERESTARTSYS; + schedule(); + return 0; +} + /* * This is the RPC `scheduler' (or rather, the finite state machine). */ @@ -624,22 +632,21 @@ static int __rpc_execute(struct rpc_task /* sync task: sleep here */ dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid); - if (RPC_TASK_UNINTERRUPTIBLE(task)) { - __wait_event(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task)); - } else { - __wait_event_interruptible(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task), status); + /* Note: Caller should be using rpc_clnt_sigmask() */ + status = out_of_line_wait_on_bit(&task->tk_runstate, + RPC_TASK_QUEUED, rpc_wait_bit_interruptible, + TASK_INTERRUPTIBLE); + if (status == -ERESTARTSYS) { /* * When a sync task receives a signal, it exits with * -ERESTARTSYS. In order to catch any callbacks that * clean up after sleeping on some queue, we don't * break the loop here, but go around once more. */ - if (status == -ERESTARTSYS) { - dprintk("RPC: %4d got signal\n", task->tk_pid); - task->tk_flags |= RPC_TASK_KILLED; - rpc_exit(task, -ERESTARTSYS); - rpc_wake_up_task(task); - } + dprintk("RPC: %4d got signal\n", task->tk_pid); + task->tk_flags |= RPC_TASK_KILLED; + rpc_exit(task, -ERESTARTSYS); + rpc_wake_up_task(task); } rpc_set_running(task); dprintk("RPC: %4d sync task resuming\n", task->tk_pid); @@ -759,8 +766,6 @@ void rpc_init_task(struct rpc_task *task /* Initialize workqueue for async tasks */ task->tk_workqueue = rpciod_workqueue; - if (!RPC_IS_ASYNC(task)) - init_waitqueue_head(&task->u.tk_wait.waitq); if (clnt) { atomic_inc(&clnt->cl_users); @@ -768,6 +773,7 @@ void rpc_init_task(struct rpc_task *task task->tk_flags |= RPC_TASK_SOFT; if (!clnt->cl_intr) task->tk_flags |= RPC_TASK_NOINTR; + task->tk_xprt = rpc_client_get_xprt(clnt); } #ifdef RPC_DEBUG @@ -857,6 +863,7 @@ void rpc_release_task(struct rpc_task *t rpcauth_unbindcred(task); rpc_free(task); if (task->tk_client) { + rpc_put_xprt(task->tk_xprt); rpc_release_client(task->tk_client); task->tk_client = NULL; } Index: linux-2.6.12-rc3/net/sunrpc/sunrpc_syms.c =================================================================== --- linux-2.6.12-rc3.orig/net/sunrpc/sunrpc_syms.c +++ linux-2.6.12-rc3/net/sunrpc/sunrpc_syms.c @@ -42,6 +42,7 @@ EXPORT_SYMBOL(rpc_release_task); /* RPC client functions */ EXPORT_SYMBOL(rpc_create_client); EXPORT_SYMBOL(rpc_clone_client); +EXPORT_SYMBOL(rpc_bind_new_program); EXPORT_SYMBOL(rpc_destroy_client); EXPORT_SYMBOL(rpc_shutdown_client); EXPORT_SYMBOL(rpc_release_client); @@ -61,7 +62,6 @@ EXPORT_SYMBOL(rpc_mkpipe); /* Client transport */ EXPORT_SYMBOL(xprt_create_proto); -EXPORT_SYMBOL(xprt_destroy); EXPORT_SYMBOL(xprt_set_timeout); EXPORT_SYMBOL(xprt_udp_slot_table_entries); EXPORT_SYMBOL(xprt_tcp_slot_table_entries); @@ -129,6 +129,10 @@ EXPORT_SYMBOL(xdr_encode_netobj); EXPORT_SYMBOL(xdr_encode_pages); EXPORT_SYMBOL(xdr_inline_pages); EXPORT_SYMBOL(xdr_shift_buf); +EXPORT_SYMBOL(xdr_encode_word); +EXPORT_SYMBOL(xdr_decode_word); +EXPORT_SYMBOL(xdr_encode_array2); +EXPORT_SYMBOL(xdr_decode_array2); EXPORT_SYMBOL(xdr_buf_from_iov); EXPORT_SYMBOL(xdr_buf_subsegment); EXPORT_SYMBOL(xdr_buf_read_netobj); Index: linux-2.6.12-rc3/net/sunrpc/svc.c =================================================================== --- linux-2.6.12-rc3.orig/net/sunrpc/svc.c +++ linux-2.6.12-rc3/net/sunrpc/svc.c @@ -35,20 +35,24 @@ svc_create(struct svc_program *prog, uns if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL))) return NULL; memset(serv, 0, sizeof(*serv)); + serv->sv_name = prog->pg_name; serv->sv_program = prog; serv->sv_nrthreads = 1; serv->sv_stats = prog->pg_stats; serv->sv_bufsz = bufsize? bufsize : 4096; - prog->pg_lovers = prog->pg_nvers-1; xdrsize = 0; - for (vers=0; verspg_nvers ; vers++) - if (prog->pg_vers[vers]) { - prog->pg_hivers = vers; - if (prog->pg_lovers > vers) - prog->pg_lovers = vers; - if (prog->pg_vers[vers]->vs_xdrsize > xdrsize) - xdrsize = prog->pg_vers[vers]->vs_xdrsize; - } + while (prog) { + prog->pg_lovers = prog->pg_nvers-1; + for (vers=0; verspg_nvers ; vers++) + if (prog->pg_vers[vers]) { + prog->pg_hivers = vers; + if (prog->pg_lovers > vers) + prog->pg_lovers = vers; + if (prog->pg_vers[vers]->vs_xdrsize > xdrsize) + xdrsize = prog->pg_vers[vers]->vs_xdrsize; + } + prog = prog->pg_next; + } serv->sv_xdrsize = xdrsize; INIT_LIST_HEAD(&serv->sv_threads); INIT_LIST_HEAD(&serv->sv_sockets); @@ -56,8 +60,6 @@ svc_create(struct svc_program *prog, uns INIT_LIST_HEAD(&serv->sv_permsocks); spin_lock_init(&serv->sv_lock); - serv->sv_name = prog->pg_name; - /* Remove any stale portmap registrations */ svc_register(serv, 0, 0); @@ -281,6 +283,7 @@ svc_process(struct svc_serv *serv, struc rqstp->rq_res.len = 0; rqstp->rq_res.page_base = 0; rqstp->rq_res.page_len = 0; + rqstp->rq_res.buflen = PAGE_SIZE; rqstp->rq_res.tail[0].iov_len = 0; /* tcp needs a space for the record length... */ if (rqstp->rq_prot == IPPROTO_TCP) @@ -338,7 +341,10 @@ svc_process(struct svc_serv *serv, struc goto sendit; } - if (prog != progp->pg_prog) + for (progp = serv->sv_program; progp; progp = progp->pg_next) + if (prog == progp->pg_prog) + break; + if (progp == NULL) goto err_bad_prog; if (vers >= progp->pg_nvers || @@ -451,11 +457,7 @@ err_bad_auth: goto sendit; err_bad_prog: -#ifdef RPC_PARANOIA - if (prog != 100227 || progp->pg_prog != 100003) - printk("svc: unknown program %d (me %d)\n", prog, progp->pg_prog); - /* else it is just a Solaris client seeing if ACLs are supported */ -#endif + dprintk("svc: unknown program %d\n", prog); serv->sv_stats->rpcbadfmt++; svc_putu32(resv, rpc_prog_unavail); goto sendit; Index: linux-2.6.12-rc3/net/sunrpc/xdr.c =================================================================== --- linux-2.6.12-rc3.orig/net/sunrpc/xdr.c +++ linux-2.6.12-rc3/net/sunrpc/xdr.c @@ -176,7 +176,7 @@ xdr_inline_pages(struct xdr_buf *xdr, un xdr->buflen += len; } -void +int xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor) @@ -190,7 +190,7 @@ xdr_partial_copy_from_skb(struct xdr_buf len -= base; ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); if (ret != len || !desc->count) - return; + return 0; base = 0; } else base -= len; @@ -210,6 +210,14 @@ xdr_partial_copy_from_skb(struct xdr_buf do { char *kaddr; + /* ACL likes to be lazy in allocating pages - ACLs + * are small by default but can get huge. */ + if (unlikely(*ppage == NULL)) { + *ppage = alloc_page(GFP_ATOMIC); + if (unlikely(*ppage == NULL)) + return -ENOMEM; + } + len = PAGE_CACHE_SIZE; kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); if (base) { @@ -226,13 +234,15 @@ xdr_partial_copy_from_skb(struct xdr_buf flush_dcache_page(*ppage); kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); if (ret != len || !desc->count) - return; + return 0; ppage++; } while ((pglen -= len) != 0); copy_tail: len = xdr->tail[0].iov_len; if (base < len) copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); + + return 0; } @@ -616,12 +626,24 @@ xdr_shift_buf(struct xdr_buf *buf, size_ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) { struct kvec *iov = buf->head; + int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; + BUG_ON(scratch_len < 0); xdr->buf = buf; xdr->iov = iov; - xdr->end = (uint32_t *)((char *)iov->iov_base + iov->iov_len); - buf->len = iov->iov_len = (char *)p - (char *)iov->iov_base; - xdr->p = p; + xdr->p = (uint32_t *)((char *)iov->iov_base + iov->iov_len); + xdr->end = (uint32_t *)((char *)iov->iov_base + scratch_len); + BUG_ON(iov->iov_len > scratch_len); + + if (p != xdr->p && p != NULL) { + size_t len; + + BUG_ON(p < xdr->p || p > xdr->end); + len = (char *)p - (char *)xdr->p; + xdr->p = p; + buf->len += len; + iov->iov_len += len; + } } EXPORT_SYMBOL(xdr_init_encode); @@ -736,8 +758,7 @@ EXPORT_SYMBOL(xdr_inline_decode); * * Moves data beyond the current pointer position from the XDR head[] buffer * into the page list. Any data that lies beyond current position + "len" - * bytes is moved into the XDR tail[]. The current pointer is then - * repositioned at the beginning of the XDR tail. + * bytes is moved into the XDR tail[]. */ void xdr_read_pages(struct xdr_stream *xdr, unsigned int len) { @@ -774,6 +795,31 @@ void xdr_read_pages(struct xdr_stream *x } EXPORT_SYMBOL(xdr_read_pages); +/** + * xdr_enter_page - decode data from the XDR page + * @xdr: pointer to xdr_stream struct + * @len: number of bytes of page data + * + * Moves data beyond the current pointer position from the XDR head[] buffer + * into the page list. Any data that lies beyond current position + "len" + * bytes is moved into the XDR tail[]. The current pointer is then + * repositioned at the beginning of the first XDR page. + */ +void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) +{ + char * kaddr = page_address(xdr->buf->pages[0]); + xdr_read_pages(xdr, len); + /* + * Position current pointer at beginning of tail, and + * set remaining message length. + */ + if (len > PAGE_CACHE_SIZE - xdr->buf->page_base) + len = PAGE_CACHE_SIZE - xdr->buf->page_base; + xdr->p = (uint32_t *)(kaddr + xdr->buf->page_base); + xdr->end = (uint32_t *)((char *)xdr->p + len); +} +EXPORT_SYMBOL(xdr_enter_page); + static struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0}; void @@ -859,8 +905,34 @@ out: return status; } -static int -read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) +/* obj is assumed to point to allocated memory of size at least len: */ +int +write_bytes_to_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len) +{ + struct xdr_buf subbuf; + int this_len; + int status; + + status = xdr_buf_subsegment(buf, &subbuf, base, len); + if (status) + goto out; + this_len = min(len, (int)subbuf.head[0].iov_len); + memcpy(subbuf.head[0].iov_base, obj, this_len); + len -= this_len; + obj += this_len; + this_len = min(len, (int)subbuf.page_len); + if (this_len) + _copy_to_pages(subbuf.pages, subbuf.page_base, obj, this_len); + len -= this_len; + obj += this_len; + this_len = min(len, (int)subbuf.tail[0].iov_len); + memcpy(subbuf.tail[0].iov_base, obj, this_len); +out: + return status; +} + +int +xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj) { u32 raw; int status; @@ -872,6 +944,14 @@ read_u32_from_xdr_buf(struct xdr_buf *bu return 0; } +int +xdr_encode_word(struct xdr_buf *buf, int base, u32 obj) +{ + u32 raw = htonl(obj); + + return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj)); +} + /* If the netobj starting offset bytes from the start of xdr_buf is contained * entirely in the head or the tail, set object to point to it; otherwise * try to find space for it at the end of the tail, copy it there, and @@ -882,7 +962,7 @@ xdr_buf_read_netobj(struct xdr_buf *buf, u32 tail_offset = buf->head[0].iov_len + buf->page_len; u32 obj_end_offset; - if (read_u32_from_xdr_buf(buf, offset, &obj->len)) + if (xdr_decode_word(buf, offset, &obj->len)) goto out; obj_end_offset = offset + 4 + obj->len; @@ -915,3 +995,219 @@ xdr_buf_read_netobj(struct xdr_buf *buf, out: return -1; } + +/* Returns 0 on success, or else a negative error code. */ +static int +xdr_xcode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc, int encode) +{ + char *elem = NULL, *c; + unsigned int copied = 0, todo, avail_here; + struct page **ppages = NULL; + int err; + + if (encode) { + if (xdr_encode_word(buf, base, desc->array_len) != 0) + return -EINVAL; + } else { + if (xdr_decode_word(buf, base, &desc->array_len) != 0 || + (unsigned long) base + 4 + desc->array_len * + desc->elem_size > buf->len) + return -EINVAL; + } + base += 4; + + if (!desc->xcode) + return 0; + + todo = desc->array_len * desc->elem_size; + + /* process head */ + if (todo && base < buf->head->iov_len) { + c = buf->head->iov_base + base; + avail_here = min_t(unsigned int, todo, + buf->head->iov_len - base); + todo -= avail_here; + + while (avail_here >= desc->elem_size) { + err = desc->xcode(desc, c); + if (err) + goto out; + c += desc->elem_size; + avail_here -= desc->elem_size; + } + if (avail_here) { + if (!elem) { + elem = kmalloc(desc->elem_size, GFP_KERNEL); + err = -ENOMEM; + if (!elem) + goto out; + } + if (encode) { + err = desc->xcode(desc, elem); + if (err) + goto out; + memcpy(c, elem, avail_here); + } else + memcpy(elem, c, avail_here); + copied = avail_here; + } + base = buf->head->iov_len; /* align to start of pages */ + } + + /* process pages array */ + base -= buf->head->iov_len; + if (todo && base < buf->page_len) { + unsigned int avail_page; + + avail_here = min(todo, buf->page_len - base); + todo -= avail_here; + + base += buf->page_base; + ppages = buf->pages + (base >> PAGE_CACHE_SHIFT); + base &= ~PAGE_CACHE_MASK; + avail_page = min_t(unsigned int, PAGE_CACHE_SIZE - base, + avail_here); + c = kmap(*ppages) + base; + + while (avail_here) { + avail_here -= avail_page; + if (copied || avail_page < desc->elem_size) { + unsigned int l = min(avail_page, + desc->elem_size - copied); + if (!elem) { + elem = kmalloc(desc->elem_size, + GFP_KERNEL); + err = -ENOMEM; + if (!elem) + goto out; + } + if (encode) { + if (!copied) { + err = desc->xcode(desc, elem); + if (err) + goto out; + } + memcpy(c, elem + copied, l); + copied += l; + if (copied == desc->elem_size) + copied = 0; + } else { + memcpy(elem + copied, c, l); + copied += l; + if (copied == desc->elem_size) { + err = desc->xcode(desc, elem); + if (err) + goto out; + copied = 0; + } + } + avail_page -= l; + c += l; + } + while (avail_page >= desc->elem_size) { + err = desc->xcode(desc, c); + if (err) + goto out; + c += desc->elem_size; + avail_page -= desc->elem_size; + } + if (avail_page) { + unsigned int l = min(avail_page, + desc->elem_size - copied); + if (!elem) { + elem = kmalloc(desc->elem_size, + GFP_KERNEL); + err = -ENOMEM; + if (!elem) + goto out; + } + if (encode) { + if (!copied) { + err = desc->xcode(desc, elem); + if (err) + goto out; + } + memcpy(c, elem + copied, l); + copied += l; + if (copied == desc->elem_size) + copied = 0; + } else { + memcpy(elem + copied, c, l); + copied += l; + if (copied == desc->elem_size) { + err = desc->xcode(desc, elem); + if (err) + goto out; + copied = 0; + } + } + } + if (avail_here) { + kunmap(*ppages); + ppages++; + c = kmap(*ppages); + } + + avail_page = min(avail_here, + (unsigned int) PAGE_CACHE_SIZE); + } + base = buf->page_len; /* align to start of tail */ + } + + /* process tail */ + base -= buf->page_len; + if (todo) { + c = buf->tail->iov_base + base; + if (copied) { + unsigned int l = desc->elem_size - copied; + + if (encode) + memcpy(c, elem + copied, l); + else { + memcpy(elem + copied, c, l); + err = desc->xcode(desc, elem); + if (err) + goto out; + } + todo -= l; + c += l; + } + while (todo) { + err = desc->xcode(desc, c); + if (err) + goto out; + c += desc->elem_size; + todo -= desc->elem_size; + } + } + err = 0; + +out: + if (elem) + kfree(elem); + if (ppages) + kunmap(*ppages); + return err; +} + +int +xdr_decode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc) +{ + if (base >= buf->len) + return -EINVAL; + + return xdr_xcode_array2(buf, base, desc, 0); +} + +int +xdr_encode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc) +{ + if ((unsigned long) base + 4 + desc->array_len * desc->elem_size > + buf->head->iov_len + buf->page_len + buf->tail->iov_len) + return -EINVAL; + + return xdr_xcode_array2(buf, base, desc, 1); +} Index: linux-2.6.12-rc3/net/sunrpc/xprt.c =================================================================== --- linux-2.6.12-rc3.orig/net/sunrpc/xprt.c +++ linux-2.6.12-rc3/net/sunrpc/xprt.c @@ -725,7 +725,8 @@ csum_partial_copy_to_xdr(struct xdr_buf goto no_checksum; desc.csum = csum_partial(skb->data, desc.offset, skb->csum); - xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits); + if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) + return -1; if (desc.offset != skb->len) { unsigned int csum2; csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); @@ -737,7 +738,8 @@ csum_partial_copy_to_xdr(struct xdr_buf return -1; return 0; no_checksum: - xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits); + if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) + return -1; if (desc.count) return -1; return 0; @@ -907,6 +909,7 @@ tcp_read_request(struct rpc_xprt *xprt, struct rpc_rqst *req; struct xdr_buf *rcvbuf; size_t len; + int r; /* Find and lock the request corresponding to this xid */ spin_lock(&xprt->sock_lock); @@ -927,16 +930,30 @@ tcp_read_request(struct rpc_xprt *xprt, len = xprt->tcp_reclen - xprt->tcp_offset; memcpy(&my_desc, desc, sizeof(my_desc)); my_desc.count = len; - xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, + r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, &my_desc, tcp_copy_data); desc->count -= len; desc->offset += len; } else - xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, + r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, desc, tcp_copy_data); xprt->tcp_copied += len; xprt->tcp_offset += len; + if (r < 0) { + /* Error when copying to the receive buffer, + * usually because we weren't able to allocate + * additional buffer pages. All we can do now + * is turn off XPRT_COPY_DATA, so the request + * will not receive any additional updates, + * and time out. + * Any remaining data from this record will + * be discarded. + */ + xprt->tcp_flags &= ~XPRT_COPY_DATA; + goto out; + } + if (xprt->tcp_copied == req->rq_private_buf.buflen) xprt->tcp_flags &= ~XPRT_COPY_DATA; else if (xprt->tcp_offset == xprt->tcp_reclen) { @@ -949,6 +966,7 @@ tcp_read_request(struct rpc_xprt *xprt, req->rq_task->tk_pid); xprt_complete_rqst(xprt, req, xprt->tcp_copied); } +out: spin_unlock(&xprt->sock_lock); tcp_check_recm(xprt); } @@ -1446,6 +1464,7 @@ xprt_setup(int proto, struct sockaddr_in if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) return ERR_PTR(-ENOMEM); memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ + atomic_set(&xprt->count, 1); xprt->max_reqs = entries; slot_table_size = entries * sizeof(xprt->slot[0]); xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); @@ -1676,3 +1695,14 @@ xprt_destroy(struct rpc_xprt *xprt) return 0; } + +/** + * rpc_put_xprt() - Drop reference to the RPC transport struct + * @xprt - pointer to RPC transport + */ +void rpc_put_xprt(struct rpc_xprt *xprt) +{ + if (xprt != NULL && atomic_dec_and_test(&xprt->count)) + xprt_destroy(xprt); +} +