diff -u --recursive --new-file linux-2.4.7/arch/i386/kernel/i386_ksyms.c linux-2.4.7-rpc_sched/arch/i386/kernel/i386_ksyms.c --- linux-2.4.7/arch/i386/kernel/i386_ksyms.c Mon Jul 2 23:49:24 2001 +++ linux-2.4.7-rpc_sched/arch/i386/kernel/i386_ksyms.c Sat Jul 21 16:58:55 2001 @@ -161,3 +161,7 @@ #ifdef CONFIG_X86_PAE EXPORT_SYMBOL(empty_zero_page); #endif + +#ifdef CONFIG_HAVE_DEC_LOCK +EXPORT_SYMBOL(atomic_dec_and_lock); +#endif diff -u --recursive --new-file linux-2.4.7/arch/sparc64/kernel/sparc64_ksyms.c linux-2.4.7-rpc_sched/arch/sparc64/kernel/sparc64_ksyms.c --- linux-2.4.7/arch/sparc64/kernel/sparc64_ksyms.c Tue Jun 12 04:15:27 2001 +++ linux-2.4.7-rpc_sched/arch/sparc64/kernel/sparc64_ksyms.c Sat Jul 21 16:58:55 2001 @@ -163,6 +163,7 @@ /* Atomic counter implementation. */ EXPORT_SYMBOL(__atomic_add); EXPORT_SYMBOL(__atomic_sub); +EXPORT_SYMBOL(atomic_dec_and_lock); /* Atomic bit operations. */ EXPORT_SYMBOL(___test_and_set_bit); diff -u --recursive --new-file linux-2.4.7/fs/lockd/clntlock.c linux-2.4.7-rpc_sched/fs/lockd/clntlock.c --- linux-2.4.7/fs/lockd/clntlock.c Tue Nov 7 19:18:57 2000 +++ linux-2.4.7-rpc_sched/fs/lockd/clntlock.c Sat Jul 21 17:01:31 2001 @@ -138,7 +138,7 @@ void nlmclnt_recovery(struct nlm_host *host, u32 newstate) { - if (!host->h_reclaiming++) { + if (host->h_reclaiming++) { if (host->h_nsmstate == newstate) return; printk(KERN_WARNING @@ -153,7 +153,7 @@ host->h_nsmstate = newstate; host->h_state++; nlm_get_host(host); - kernel_thread(reclaimer, host, 0); + kernel_thread(reclaimer, host, CLONE_SIGNAL); } } @@ -167,12 +167,24 @@ /* This one ensures that our parent doesn't terminate while the * reclaim is in progress */ lock_kernel(); + + daemonize(); + strcpy(current->comm, "lockd-reclaim"); + + /* Block signals */ + spin_lock_irq(¤t->sigmask_lock); + siginitsetinv(¤t->blocked, sigmask(SIGKILL)); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + lockd_up(); + exit_files(current); + /* First, reclaim all locks that have been granted previously. */ restart: tmp = file_lock_list.next; - while (tmp != &file_lock_list) { + while (tmp != &file_lock_list && ! signalled()) { struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); struct inode *inode = fl->fl_file->f_dentry->d_inode; if (inode->i_sb->s_magic == NFS_SUPER_MAGIC && diff -u --recursive --new-file linux-2.4.7/fs/lockd/clntproc.c linux-2.4.7-rpc_sched/fs/lockd/clntproc.c --- linux-2.4.7/fs/lockd/clntproc.c Mon Dec 4 03:01:01 2000 +++ linux-2.4.7-rpc_sched/fs/lockd/clntproc.c Sat Jul 21 17:01:11 2001 @@ -142,7 +142,8 @@ /* If we're cleaning up locks because the process is exiting, * perform the RPC call asynchronously. */ - if ((cmd == F_SETLK || cmd == F_SETLKW) + if ((cmd == F_SETLK || cmd == F_SETLKW + || cmd == F_SETLK64 || cmd == F_SETLKW64) && fl->fl_type == F_UNLCK && (current->flags & PF_EXITING)) { sigfillset(¤t->blocked); /* Mask all signals */ @@ -166,13 +167,15 @@ /* Set up the argument struct */ nlmclnt_setlockargs(call, fl); - if (cmd == F_GETLK) { + if (cmd == F_GETLK || cmd == F_GETLK64) { status = nlmclnt_test(call, fl); - } else if ((cmd == F_SETLK || cmd == F_SETLKW) + } else if ((cmd == F_SETLK || cmd == F_SETLKW + || cmd == F_SETLK64 || cmd == F_SETLKW64) && fl->fl_type == F_UNLCK) { status = nlmclnt_unlock(call, fl); - } else if (cmd == F_SETLK || cmd == F_SETLKW) { - call->a_args.block = (cmd == F_SETLKW)? 1 : 0; + } else if (cmd == F_SETLK || cmd == F_SETLKW + || cmd == F_SETLK64 || cmd == F_SETLKW64) { + call->a_args.block = (cmd == F_SETLKW) || cmd == F_SETLKW64? 1 : 0; status = nlmclnt_lock(call, fl); } else { status = -EINVAL; diff -u --recursive --new-file linux-2.4.7/fs/lockd/host.c linux-2.4.7-rpc_sched/fs/lockd/host.c --- linux-2.4.7/fs/lockd/host.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.7-rpc_sched/fs/lockd/host.c Sat Jul 21 17:01:31 2001 @@ -51,7 +51,8 @@ struct nlm_host * nlmsvc_lookup_host(struct svc_rqst *rqstp) { - return nlm_lookup_host(rqstp->rq_client, &rqstp->rq_addr, 0, 0); + return nlm_lookup_host(rqstp->rq_client, &rqstp->rq_addr, + rqstp->rq_prot, rqstp->rq_vers); } /* @@ -97,7 +98,9 @@ nlm_gc_hosts(); for (hp = &nlm_hosts[hash]; (host = *hp); hp = &host->h_next) { - if (host->h_version != version || host->h_proto != proto) + if (proto && host->h_proto != proto) + continue; + if (version && host->h_version != version) continue; if (nlm_match_host(host, clnt, sin)) { diff -u --recursive --new-file linux-2.4.7/fs/lockd/mon.c linux-2.4.7-rpc_sched/fs/lockd/mon.c --- linux-2.4.7/fs/lockd/mon.c Tue Jun 12 04:15:27 2001 +++ linux-2.4.7-rpc_sched/fs/lockd/mon.c Sat Jul 21 17:01:31 2001 @@ -43,7 +43,7 @@ args.addr = host->h_addr.sin_addr.s_addr; args.prog = NLM_PROGRAM; - args.vers = 1; + args.vers = host->h_version; args.proc = NLMPROC_NSM_NOTIFY; memset(res, 0, sizeof(*res)); diff -u --recursive --new-file linux-2.4.7/fs/lockd/svc.c linux-2.4.7-rpc_sched/fs/lockd/svc.c --- linux-2.4.7/fs/lockd/svc.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.7-rpc_sched/fs/lockd/svc.c Sat Jul 21 17:01:32 2001 @@ -65,6 +65,7 @@ struct svc_serv *serv = rqstp->rq_server; int err = 0; unsigned long grace_period_expire; + struct k_sigaction sa; /* Lock module and set up kernel thread */ MOD_INC_USE_COUNT; @@ -76,16 +77,20 @@ nlmsvc_pid = current->pid; up(&lockd_start); - exit_mm(current); - current->session = 1; - current->pgrp = 1; + daemonize(); sprintf(current->comm, "lockd"); /* Process request with signals blocked. */ spin_lock_irq(¤t->sigmask_lock); - siginitsetinv(¤t->blocked, sigmask(SIGKILL)); + siginitsetinv(¤t->blocked, sigmask(SIGKILL)|sigmask(SIGCHLD)); recalc_sigpending(current); - spin_unlock_irq(¤t->sigmask_lock); + spin_unlock_irq(¤t->sigmask_lock); + + /* Install a handler so SIGCLD is ignored */ + sa.sa.sa_handler = SIG_IGN; + sa.sa.sa_flags = 0; + siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD)); + do_sigaction(SIGCHLD, &sa, NULL); /* kick rpciod */ rpciod_up(); @@ -105,10 +110,10 @@ nlmsvc_grace_period = 10 * HZ; #else if (nlm_grace_period) { - nlmsvc_grace_period += (1 + nlm_grace_period / nlm_timeout) + nlmsvc_grace_period = (1 + nlm_grace_period / nlm_timeout) * nlm_timeout * HZ; } else { - nlmsvc_grace_period += 5 * nlm_timeout * HZ; + nlmsvc_grace_period = 5 * nlm_timeout * HZ; } #endif @@ -135,10 +140,12 @@ * (Theoretically, there shouldn't even be blocked locks * during grace period). */ - if (!nlmsvc_grace_period) { + if (!grace_period_expire) { timeout = nlmsvc_retry_blocked(); - } else if (time_before(nlmsvc_grace_period, jiffies)) + } else if (time_before(grace_period_expire, jiffies)) { + grace_period_expire = 0; nlmsvc_grace_period = 0; + } /* * Find a socket with data available and call its @@ -339,7 +346,7 @@ * Define NLM program and procedures */ static struct svc_version nlmsvc_version1 = { - 1, 16, nlmsvc_procedures, NULL + 1, 17, nlmsvc_procedures, NULL }; static struct svc_version nlmsvc_version3 = { 3, 24, nlmsvc_procedures, NULL diff -u --recursive --new-file linux-2.4.7/fs/lockd/svc4proc.c linux-2.4.7-rpc_sched/fs/lockd/svc4proc.c --- linux-2.4.7/fs/lockd/svc4proc.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.7-rpc_sched/fs/lockd/svc4proc.c Sat Jul 21 17:01:32 2001 @@ -420,6 +420,8 @@ void *resp) { struct sockaddr_in saddr = rqstp->rq_addr; + int vers = rqstp->rq_vers; + int prot = rqstp->rq_prot; struct nlm_host *host; dprintk("lockd: SM_NOTIFY called\n"); @@ -435,8 +437,8 @@ /* Obtain the host pointer for this NFS server and try to * reclaim all locks we hold on this server. */ - saddr.sin_addr.s_addr = argp->addr; - if ((host = nlm_lookup_host(NULL, &saddr, IPPROTO_UDP, 1)) != NULL) { + saddr.sin_addr.s_addr = htonl(argp->addr); + if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) { nlmclnt_recovery(host, argp->state); nlm_release_host(host); } @@ -444,7 +446,7 @@ /* If we run on an NFS server, delete all locks held by the client */ if (nlmsvc_ops != NULL) { struct svc_client *clnt; - saddr.sin_addr.s_addr = argp->addr; + saddr.sin_addr.s_addr = argp->addr; if ((clnt = nlmsvc_ops->exp_getclient(&saddr)) != NULL && (host = nlm_lookup_host(clnt, &saddr, 0, 0)) != NULL) { nlmsvc_free_host_resources(host); @@ -549,7 +551,8 @@ PROC(cancel_res, cancelres, norep, res, void), PROC(unlock_res, unlockres, norep, res, void), PROC(granted_res, grantedres, norep, res, void), - PROC(none, void, void, void, void), + /* statd callback */ + PROC(sm_notify, reboot, void, reboot, void), PROC(none, void, void, void, void), PROC(none, void, void, void, void), PROC(none, void, void, void, void), @@ -558,6 +561,4 @@ PROC(nm_lock, lockargs, res, args, res), PROC(free_all, notify, void, args, void), - /* statd callback */ - PROC(sm_notify, reboot, void, reboot, void), }; diff -u --recursive --new-file linux-2.4.7/fs/lockd/svcproc.c linux-2.4.7-rpc_sched/fs/lockd/svcproc.c --- linux-2.4.7/fs/lockd/svcproc.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.7-rpc_sched/fs/lockd/svcproc.c Sat Jul 21 17:01:32 2001 @@ -445,6 +445,8 @@ void *resp) { struct sockaddr_in saddr = rqstp->rq_addr; + int vers = rqstp->rq_vers; + int prot = rqstp->rq_prot; struct nlm_host *host; dprintk("lockd: SM_NOTIFY called\n"); @@ -460,8 +462,8 @@ /* Obtain the host pointer for this NFS server and try to * reclaim all locks we hold on this server. */ - saddr.sin_addr.s_addr = argp->addr; - if ((host = nlm_lookup_host(NULL, &saddr, IPPROTO_UDP, 1)) != NULL) { + saddr.sin_addr.s_addr = htonl(argp->addr); + if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) { nlmclnt_recovery(host, argp->state); nlm_release_host(host); } @@ -574,7 +576,8 @@ PROC(cancel_res, cancelres, norep, res, void), PROC(unlock_res, unlockres, norep, res, void), PROC(granted_res, grantedres, norep, res, void), - PROC(none, void, void, void, void), + /* statd callback */ + PROC(sm_notify, reboot, void, reboot, void), PROC(none, void, void, void, void), PROC(none, void, void, void, void), PROC(none, void, void, void, void), @@ -583,6 +586,4 @@ PROC(nm_lock, lockargs, res, args, res), PROC(free_all, notify, void, args, void), - /* statd callback */ - PROC(sm_notify, reboot, void, reboot, void), }; diff -u --recursive --new-file linux-2.4.7/fs/locks.c linux-2.4.7-rpc_sched/fs/locks.c --- linux-2.4.7/fs/locks.c Sat Jul 21 16:50:28 2001 +++ linux-2.4.7-rpc_sched/fs/locks.c Sat Jul 21 17:01:11 2001 @@ -257,7 +257,7 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, struct flock *l) { - loff_t start; + off_t start, end; switch (l->l_whence) { case 0: /*SEEK_SET*/ @@ -270,17 +270,16 @@ start = filp->f_dentry->d_inode->i_size; break; default: - return (0); + return -EINVAL; } if (((start += l->l_start) < 0) || (l->l_len < 0)) - return (0); - fl->fl_end = start + l->l_len - 1; - if (l->l_len > 0 && fl->fl_end < 0) - return (0); - if (fl->fl_end > OFFT_OFFSET_MAX) - return 0; + return -EINVAL; + end = start + l->l_len - 1; + if (l->l_len > 0 && end < 0) + return -EOVERFLOW; fl->fl_start = start; /* we record the absolute position */ + fl->fl_end = end; if (l->l_len == 0) fl->fl_end = OFFSET_MAX; @@ -292,7 +291,7 @@ fl->fl_insert = NULL; fl->fl_remove = NULL; - return (assign_type(fl, l->l_type) == 0); + return assign_type(fl, l->l_type); } #if BITS_PER_LONG == 32 @@ -312,14 +311,14 @@ start = filp->f_dentry->d_inode->i_size; break; default: - return (0); + return -EINVAL; } if (((start += l->l_start) < 0) || (l->l_len < 0)) - return (0); + return -EINVAL; fl->fl_end = start + l->l_len - 1; if (l->l_len > 0 && fl->fl_end < 0) - return (0); + return -EOVERFLOW; fl->fl_start = start; /* we record the absolute position */ if (l->l_len == 0) fl->fl_end = OFFSET_MAX; @@ -339,10 +338,10 @@ fl->fl_type = l->l_type; break; default: - return (0); + return -EINVAL; } - return (1); + return (0); } #endif @@ -1353,8 +1352,8 @@ if (!filp) goto out; - error = -EINVAL; - if (!flock_to_posix_lock(filp, &file_lock, &flock)) + error = flock_to_posix_lock(filp, &file_lock, &flock); + if (error) goto out_putf; if (filp->f_op && filp->f_op->lock) { @@ -1443,8 +1442,8 @@ } } - error = -EINVAL; - if (!flock_to_posix_lock(filp, file_lock, &flock)) + error = flock_to_posix_lock(filp, file_lock, &flock); + if (error) goto out_putf; error = -EBADF; @@ -1518,8 +1517,8 @@ if (!filp) goto out; - error = -EINVAL; - if (!flock64_to_posix_lock(filp, &file_lock, &flock)) + error = flock64_to_posix_lock(filp, &file_lock, &flock); + if (error) goto out_putf; if (filp->f_op && filp->f_op->lock) { @@ -1596,8 +1595,8 @@ } } - error = -EINVAL; - if (!flock64_to_posix_lock(filp, file_lock, &flock)) + error = flock64_to_posix_lock(filp, file_lock, &flock); + if (error) goto out_putf; error = -EBADF; diff -u --recursive --new-file linux-2.4.7/fs/namei.c linux-2.4.7-rpc_sched/fs/namei.c --- linux-2.4.7/fs/namei.c Sat Jul 21 16:50:28 2001 +++ linux-2.4.7-rpc_sched/fs/namei.c Sat Jul 21 17:00:12 2001 @@ -418,7 +418,7 @@ while (*name=='/') name++; if (!*name) - goto return_base; + goto return_reval; inode = nd->dentry->d_inode; if (current->link_count) @@ -537,7 +537,7 @@ inode = nd->dentry->d_inode; /* fallthrough */ case 1: - goto return_base; + goto return_reval; } if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { err = nd->dentry->d_op->d_hash(nd->dentry, &this); @@ -588,6 +588,10 @@ nd->last_type = LAST_DOT; else if (this.len == 2 && this.name[1] == '.') nd->last_type = LAST_DOTDOT; +return_reval: + dentry = nd->dentry; + if (dentry && dentry->d_op && dentry->d_op->d_revalidate) + dentry->d_op->d_revalidate(dentry, nd->flags); return_base: return 0; out_dput: diff -u --recursive --new-file linux-2.4.7/fs/nfs/dir.c linux-2.4.7-rpc_sched/fs/nfs/dir.c --- linux-2.4.7/fs/nfs/dir.c Tue Jun 12 20:15:08 2001 +++ linux-2.4.7-rpc_sched/fs/nfs/dir.c Sat Jul 21 17:00:27 2001 @@ -34,6 +34,7 @@ #define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ +static loff_t nfs_dir_llseek(struct file *, loff_t, int); static int nfs_readdir(struct file *, void *, filldir_t); static struct dentry *nfs_lookup(struct inode *, struct dentry *); static int nfs_create(struct inode *, struct dentry *, int); @@ -47,6 +48,7 @@ struct inode *, struct dentry *); struct file_operations nfs_dir_operations = { + llseek: nfs_dir_llseek, read: generic_read_dir, readdir: nfs_readdir, open: nfs_open, @@ -68,6 +70,25 @@ setattr: nfs_notify_change, }; +static loff_t nfs_dir_llseek(struct file *file, loff_t offset, int origin) +{ + switch (origin) { + case 1: + if (offset == 0) { + offset = file->f_pos; + break; + } + case 2: + return -EINVAL; + } + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_reada = 0; + file->f_version = ++event; + } + return (offset <= 0) ? 0 : offset; +} + typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int); typedef struct { struct file *file; @@ -108,13 +129,17 @@ error = NFS_PROTO(inode)->readdir(inode, cred, desc->entry->cookie, buffer, NFS_SERVER(inode)->dtsize, desc->plus); /* We requested READDIRPLUS, but the server doesn't grok it */ - if (desc->plus && error == -ENOTSUPP) { - NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; - desc->plus = 0; - goto again; - } - if (error < 0) + if (error < 0) { + if (error == -ENOTSUPP && desc->plus) { + NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; + NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + desc->plus = 0; + goto again; + } goto error; + } + if (desc->plus) + NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; SetPageUptodate(page); kunmap(page); /* Ensure consistent page alignment of the data. @@ -195,7 +220,6 @@ dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index); - desc->plus = NFS_USE_READDIRPLUS(inode); page = read_cache_page(&inode->i_data, desc->page_index, (filler_t *)nfs_readdir_filler, desc); if (IS_ERR(page)) { @@ -247,6 +271,29 @@ return res; } +static struct { + unsigned int nfstype; + unsigned int dtype; +} nfs_type2dtype[] = { + { NFNON, DT_UNKNOWN }, + { NFREG, DT_REG }, + { NFDIR, DT_DIR }, + { NFBLK, DT_BLK }, + { NFCHR, DT_CHR }, + { NFLNK, DT_LNK }, + { NFSOCK, DT_SOCK }, + { NFBAD, DT_UNKNOWN }, + { NFFIFO, DT_FIFO }, +}; + +static inline +unsigned nfs_type_to_d_type(unsigned type) +{ + if (type < 8) + return nfs_type2dtype[type].dtype; + return DT_UNKNOWN; +} + /* * Once we've found the start of the dirent within a page: fill 'er up... */ @@ -263,11 +310,17 @@ dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target); for(;;) { + unsigned d_type = DT_UNKNOWN; /* Note: entry->prev_cookie contains the cookie for * retrieving the current dirent on the server */ fileid = nfs_fileid_to_ino_t(entry->ino); + + /* Use readdirplus info */ + if (desc->plus && (entry->fattr.valid & NFS_ATTR_FATTR)) + d_type = nfs_type_to_d_type(entry->fattr.type); + res = filldir(dirent, entry->name, entry->len, - entry->prev_cookie, fileid, DT_UNKNOWN); + entry->prev_cookie, fileid, d_type); if (res < 0) break; file->f_pos = desc->target = entry->cookie; @@ -372,6 +425,7 @@ desc->target = filp->f_pos; desc->entry = &my_entry; desc->decode = NFS_PROTO(inode)->decode_dirent; + desc->plus = NFS_USE_READDIRPLUS(inode); while(!desc->entry->eof) { res = readdir_search_pagecache(desc); @@ -401,55 +455,74 @@ return 0; } +static inline +void nfs_renew_verifier(struct inode *dir, struct dentry *dentry) +{ + dentry->d_verifier = NFS_CACHE_MTIME(dir); +} + +/* + * A check for whether or not the parent directory has changed. + * In the case it has, we assume that the dentries are untrustworthy + * and may need to be looked up again. + */ +static inline +int nfs_check_verifier(struct inode *dir, struct dentry *dentry) +{ + if (IS_ROOT(dentry)) + return 1; + nfs_revalidate_inode(NFS_SERVER(dir),dir); + return dentry->d_verifier == NFS_CACHE_MTIME(dir); +} + /* * Whenever an NFS operation succeeds, we know that the dentry * is valid, so we update the revalidation timestamp. */ -static inline void nfs_renew_times(struct dentry * dentry) +static inline void __nfs_renew_times(struct dentry * dentry) { dentry->d_time = jiffies; } +static inline void nfs_renew_times(struct dentry * dentry) +{ + __nfs_renew_times(dentry); + nfs_renew_verifier(dentry->d_parent->d_inode, dentry); +} + static inline int nfs_dentry_force_reval(struct dentry *dentry, int flags) { struct inode *inode = dentry->d_inode; unsigned long timeout = NFS_ATTRTIMEO(inode); /* - * If it's the last lookup in a series, we use a stricter - * cache consistency check by looking at the parent mtime. - * - * If it's been modified in the last hour, be really strict. - * (This still means that we can avoid doing unnecessary - * work on directories like /usr/share/bin etc which basically - * never change). + * If we're interested in close-to-open cache consistency, + * then we revalidate the inode upon lookup. */ - if (!(flags & LOOKUP_CONTINUE)) { - long diff = CURRENT_TIME - dentry->d_parent->d_inode->i_mtime; + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO) + && !(flags & LOOKUP_CONTINUE)) + return 1; - if (diff < 15*60) - timeout = 0; - } - - return time_after(jiffies,dentry->d_time + timeout); + if (time_after(jiffies, NFS_READTIME(inode) + timeout)) + return 1; + + return time_after(jiffies, dentry->d_time + timeout); } /* * We judge how long we want to trust negative * dentries by looking at the parent inode mtime. * - * If mtime is close to present time, we revalidate - * more often. + * If parent mtime has changed, we revalidate, else we wait for a + * period corresponding to the parent's attribute cache timeout value. */ -#define NFS_REVALIDATE_NEGATIVE (1 * HZ) static inline int nfs_neg_need_reval(struct dentry *dentry) { struct inode *dir = dentry->d_parent->d_inode; unsigned long timeout = NFS_ATTRTIMEO(dir); - long diff = CURRENT_TIME - dir->i_mtime; - if (diff < 5*60 && timeout > NFS_REVALIDATE_NEGATIVE) - timeout = NFS_REVALIDATE_NEGATIVE; + if (!nfs_check_verifier(dir, dentry)) + return 1; return time_after(jiffies, dentry->d_time + timeout); } @@ -462,9 +535,8 @@ * NOTE! The hit can be a negative hit too, don't assume * we have an inode! * - * If the dentry is older than the revalidation interval, - * we do a new lookup and verify that the dentry is still - * correct. + * If the parent directory is seen to have changed, we throw out the + * cached dentry and do a new lookup. */ static int nfs_lookup_revalidate(struct dentry * dentry, int flags) { @@ -477,11 +549,7 @@ lock_kernel(); dir = dentry->d_parent->d_inode; inode = dentry->d_inode; - /* - * If we don't have an inode, let's look at the parent - * directory mtime to get a hint about how often we - * should validate things.. - */ + if (!inode) { if (nfs_neg_need_reval(dentry)) goto out_bad; @@ -494,48 +562,50 @@ goto out_bad; } - if (!nfs_dentry_force_reval(dentry, flags)) - goto out_valid; - - if (IS_ROOT(dentry)) { - __nfs_revalidate_inode(NFS_SERVER(inode), inode); - goto out_valid_renew; - } + /* Force a full look up iff the parent directory has changed */ + if (nfs_check_verifier(dir, dentry)) + goto fast_getattr; - /* - * Do a new lookup and check the dentry attributes. - */ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); if (error) goto out_bad; - - /* Inode number matches? */ - if (!(fattr.valid & NFS_ATTR_FATTR) || - NFS_FSID(inode) != fattr.fsid || - NFS_FILEID(inode) != fattr.fileid) + if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) + goto out_bad; + if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; - /* Ok, remember that we successfully checked it.. */ - nfs_refresh_inode(inode, &fattr); + nfs_renew_times(dentry); + goto out_valid; + + fast_getattr: + if (!nfs_dentry_force_reval(dentry, flags)) + goto out_valid; - if (nfs_inode_is_stale(inode, &fhandle, &fattr)) + /* + * Revalidate the cached attributes. + */ + error = NFS_PROTO(inode)->getattr(inode, &fattr); + if (error) + goto out_bad; + if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; - out_valid_renew: - nfs_renew_times(dentry); -out_valid: + __nfs_renew_times(dentry); + out_valid: unlock_kernel(); return 1; -out_bad: - shrink_dcache_parent(dentry); - /* If we have submounts, don't unhash ! */ - if (have_submounts(dentry)) - goto out_valid; - d_drop(dentry); - /* Purge readdir caches. */ - nfs_zap_caches(dir); - if (inode && S_ISDIR(inode->i_mode)) + out_bad: + if (inode && S_ISDIR(inode->i_mode)) { + /* Purge readdir caches. */ nfs_zap_caches(inode); + /* If we are root, or have submounts, don't unhash ! */ + if (IS_ROOT(dentry) || have_submounts(dentry)) { + nfs_renew_verifier(dir, dentry); + goto out_valid; + } + shrink_dcache_parent(dentry); + } + d_drop(dentry); unlock_kernel(); return 0; } @@ -604,9 +674,9 @@ if (inode) { no_entry: d_add(dentry, inode); - nfs_renew_times(dentry); error = 0; } + nfs_renew_times(dentry); } out: return ERR_PTR(error); diff -u --recursive --new-file linux-2.4.7/fs/nfs/file.c linux-2.4.7-rpc_sched/fs/nfs/file.c --- linux-2.4.7/fs/nfs/file.c Tue May 22 18:26:06 2001 +++ linux-2.4.7-rpc_sched/fs/nfs/file.c Sat Jul 21 17:01:12 2001 @@ -39,7 +39,6 @@ static ssize_t nfs_file_write(struct file *, const char *, size_t, loff_t *); static int nfs_file_flush(struct file *); static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); -static int nfs_file_release(struct inode *, struct file *); struct file_operations nfs_file_operations = { read: nfs_file_read, @@ -47,7 +46,7 @@ mmap: nfs_file_mmap, open: nfs_open, flush: nfs_file_flush, - release: nfs_file_release, + release: nfs_release, fsync: nfs_fsync, lock: nfs_lock, }; @@ -88,13 +87,6 @@ return status; } -static int -nfs_file_release(struct inode *inode, struct file *file) -{ - filemap_fdatasync(inode->i_mapping); - return nfs_release(inode,file); -} - static ssize_t nfs_file_read(struct file * file, char * buf, size_t count, loff_t *ppos) { @@ -162,9 +154,18 @@ */ static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) { + int status; + kmap(page); - return nfs_flush_incompatible(file, page); + status = nfs_flush_incompatible(file, page); + if (status) + goto out_err; + return 0; + out_err: + kunmap(page); + return status; } + static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) { long status; @@ -272,7 +273,7 @@ /* Fake OK code if mounted without NLM support */ if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { - if (cmd == F_GETLK) + if (cmd == F_GETLK || cmd == F_GETLK64) status = LOCK_USE_CLNT; goto out_ok; } @@ -312,13 +313,20 @@ * This makes locking act as a cache coherency point. */ out_ok: - if ((cmd == F_SETLK || cmd == F_SETLKW) && fl->fl_type != F_UNLCK) { - filemap_fdatasync(inode->i_mapping); - down(&inode->i_sem); - nfs_wb_all(inode); /* we may have slept */ - up(&inode->i_sem); - filemap_fdatawait(inode->i_mapping); - nfs_zap_caches(inode); + switch (cmd) { + case F_SETLK: + case F_SETLKW: + case F_SETLK64: + case F_SETLKW64: + if (fl->fl_type != F_UNLCK) { + filemap_fdatasync(inode->i_mapping); + down(&inode->i_sem); + nfs_wb_all(inode); /* we may have slept */ + up(&inode->i_sem); + filemap_fdatawait(inode->i_mapping); + nfs_zap_caches(inode); + } + default: } return status; } diff -u --recursive --new-file linux-2.4.7/fs/nfs/inode.c linux-2.4.7-rpc_sched/fs/nfs/inode.c --- linux-2.4.7/fs/nfs/inode.c Wed Jun 27 23:02:29 2001 +++ linux-2.4.7-rpc_sched/fs/nfs/inode.c Sat Jul 21 17:00:27 2001 @@ -48,6 +48,7 @@ static void nfs_write_inode(struct inode *,int); static void nfs_delete_inode(struct inode *); static void nfs_put_super(struct super_block *); +static void nfs_clear_inode(struct inode *); static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct statfs *); @@ -57,6 +58,7 @@ delete_inode: nfs_delete_inode, put_super: nfs_put_super, statfs: nfs_statfs, + clear_inode: nfs_clear_inode, umount_begin: nfs_umount_begin, }; @@ -141,6 +143,19 @@ clear_inode(inode); } +/* + * For the moment, the only task for the NFS clear_inode method is to + * release the mmap credential + */ +static void +nfs_clear_inode(struct inode *inode) +{ + struct rpc_cred *cred = NFS_I(inode)->mm_cred; + + if (cred) + put_rpccred(cred); +} + void nfs_put_super(struct super_block *sb) { @@ -308,6 +323,7 @@ if (!server->hostname) goto out_unlock; strcpy(server->hostname, data->hostname); + server->caps = 0; nfsv3_try_again: /* Check NFS protocol revision and initialize RPC op vector @@ -316,6 +332,7 @@ #ifdef CONFIG_NFS_V3 server->rpc_ops = &nfs_v3_clientops; version = 3; + server->caps |= NFS_CAP_READDIRPLUS; if (data->version < 4) { printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); goto out_unlock; @@ -600,7 +617,6 @@ inode->i_ctime = nfs_time_to_secs(fattr->ctime); NFS_CACHE_CTIME(inode) = fattr->ctime; NFS_CACHE_MTIME(inode) = fattr->mtime; - NFS_CACHE_ATIME(inode) = fattr->atime; NFS_CACHE_ISIZE(inode) = fattr->size; NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; @@ -636,27 +652,6 @@ return 1; } -int -nfs_inode_is_stale(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr) -{ - /* Empty inodes are not stale */ - if (!inode->i_mode) - return 0; - - if ((fattr->mode & S_IFMT) != (inode->i_mode & S_IFMT)) - return 1; - - if (is_bad_inode(inode)) - return 1; - - /* Has the filehandle changed? If so is the old one stale? */ - if (memcmp(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)) != 0 && - __nfs_revalidate_inode(NFS_SERVER(inode),inode) == -ESTALE) - return 1; - - return 0; -} - /* * This is our own version of iget that looks up inodes by file handle * instead of inode number. We use this technique instead of using @@ -722,7 +717,7 @@ /* * Make sure the inode is up-to-date. */ - error = nfs_revalidate(dentry); + error = nfs_revalidate_inode(NFS_SERVER(inode),inode); if (error) { #ifdef NFS_PARANOIA printk("nfs_notify_change: revalidate failed, error=%d\n", error); @@ -794,6 +789,21 @@ } /* + * Ensure that mmap has a recent RPC credential for use when writing out + * shared pages + */ +static inline void +nfs_set_mmcred(struct inode *inode, struct rpc_cred *cred) +{ + struct rpc_cred **p = &NFS_I(inode)->mm_cred, + *oldcred = *p; + + *p = get_rpccred(cred); + if (oldcred) + put_rpccred(oldcred); +} + +/* * These are probably going to contain hooks for * allocating and releasing RPC credentials for * the file. I'll have to think about Tronds patch @@ -808,20 +818,20 @@ auth = NFS_CLIENT(inode)->cl_auth; cred = rpcauth_lookupcred(auth, 0); filp->private_data = cred; + if (filp->f_mode & FMODE_WRITE) + nfs_set_mmcred(inode, cred); unlock_kernel(); return 0; } int nfs_release(struct inode *inode, struct file *filp) { - struct rpc_auth *auth; struct rpc_cred *cred; lock_kernel(); - auth = NFS_CLIENT(inode)->cl_auth; cred = nfs_file_cred(filp); if (cred) - rpcauth_releasecred(auth, cred); + put_rpccred(cred); unlock_kernel(); return 0; } @@ -833,24 +843,22 @@ int __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { - int status = 0; + int status = -ESTALE; struct nfs_fattr fattr; dfprintk(PAGECACHE, "NFS: revalidating (%x/%Ld)\n", inode->i_dev, (long long)NFS_FILEID(inode)); lock_kernel(); - if (!inode || is_bad_inode(inode) || NFS_STALE(inode)) { - unlock_kernel(); - return -ESTALE; - } + if (!inode || is_bad_inode(inode)) + goto out_nowait; + if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode) + goto out_nowait; while (NFS_REVALIDATING(inode)) { status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); - if (status < 0) { - unlock_kernel(); - return status; - } + if (status < 0) + goto out_nowait; if (time_before(jiffies,NFS_READTIME(inode)+NFS_ATTRTIMEO(inode))) { status = NFS_STALE(inode) ? -ESTALE : 0; goto out_nowait; @@ -864,7 +872,8 @@ inode->i_dev, (long long)NFS_FILEID(inode), status); if (status == -ESTALE) { NFS_FLAGS(inode) |= NFS_INO_STALE; - remove_inode_hash(inode); + if (inode != inode->i_sb->s_root->d_inode) + remove_inode_hash(inode); } goto out; } @@ -877,6 +886,8 @@ } dfprintk(PAGECACHE, "NFS: (%x/%Ld) revalidation complete\n", inode->i_dev, (long long)NFS_FILEID(inode)); + + NFS_FLAGS(inode) &= ~NFS_INO_STALE; out: NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; wake_up(&inode->i_wait); @@ -976,7 +987,6 @@ NFS_CACHE_CTIME(inode) = fattr->ctime; inode->i_ctime = nfs_time_to_secs(fattr->ctime); - NFS_CACHE_ATIME(inode) = fattr->atime; inode->i_atime = nfs_time_to_secs(fattr->atime); NFS_CACHE_MTIME(inode) = new_mtime; diff -u --recursive --new-file linux-2.4.7/fs/nfs/nfs2xdr.c linux-2.4.7-rpc_sched/fs/nfs/nfs2xdr.c --- linux-2.4.7/fs/nfs/nfs2xdr.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.7-rpc_sched/fs/nfs/nfs2xdr.c Sat Jul 21 16:59:36 2001 @@ -419,7 +419,7 @@ bufsiz = bufsiz >> 2; p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->cookie); + *p++ = htonl(args->cookie & 0xFFFFFFFF); *p++ = htonl(bufsiz); /* see above */ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); @@ -504,7 +504,7 @@ entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); entry->prev_cookie = entry->cookie; - entry->cookie = ntohl(*p++); + entry->cookie = (s64)((off_t)ntohl(*p++)); entry->eof = !p[0] && p[1]; return p; diff -u --recursive --new-file linux-2.4.7/fs/nfs/nfs3proc.c linux-2.4.7-rpc_sched/fs/nfs/nfs3proc.c --- linux-2.4.7/fs/nfs/nfs3proc.c Mon Dec 4 03:01:01 2000 +++ linux-2.4.7-rpc_sched/fs/nfs/nfs3proc.c Sat Jul 21 17:00:12 2001 @@ -80,7 +80,8 @@ status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR, fhandle, fattr, 0); dprintk("NFS reply lookup: %d\n", status); - nfs_refresh_inode(dir, &dir_attr); + if (status >= 0) + status = nfs_refresh_inode(dir, &dir_attr); return status; } diff -u --recursive --new-file linux-2.4.7/fs/nfs/nfs3xdr.c linux-2.4.7-rpc_sched/fs/nfs/nfs3xdr.c --- linux-2.4.7/fs/nfs/nfs3xdr.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.7-rpc_sched/fs/nfs/nfs3xdr.c Sat Jul 21 17:00:27 2001 @@ -523,6 +523,13 @@ return 0; } +/* Hack to sign-extending 32-bit cookies */ +static inline +u64 nfs_transform_cookie64(u64 cookie) +{ + return (cookie & 0x80000000) ? (cookie ^ 0xFFFFFFFF00000000) : cookie; +} + /* * Encode arguments to readdir call */ @@ -533,7 +540,7 @@ int buflen, replen; p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_hyper(p, args->cookie); + p = xdr_encode_hyper(p, nfs_transform_cookie64(args->cookie)); *p++ = args->verf[0]; *p++ = args->verf[1]; if (args->plus) { @@ -635,6 +642,7 @@ nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus) { struct nfs_entry old = *entry; + u64 cookie; if (!*p++) { if (!*p) @@ -648,9 +656,11 @@ entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); entry->prev_cookie = entry->cookie; - p = xdr_decode_hyper(p, &entry->cookie); + p = xdr_decode_hyper(p, &cookie); + entry->cookie = nfs_transform_cookie64(cookie); if (plus) { + entry->fattr.valid = 0; p = xdr_decode_post_op_attr(p, &entry->fattr); /* In fact, a post_op_fh3: */ if (*p++) { @@ -661,11 +671,8 @@ *entry = old; return ERR_PTR(-EAGAIN); } - } else { - /* If we don't get a file handle, the attrs - * aren't worth a lot. */ - entry->fattr.valid = 0; - } + } else + memset((u8*)(&entry->fh), 0, sizeof(entry->fh)); } entry->eof = !p[0] && p[1]; diff -u --recursive --new-file linux-2.4.7/fs/nfs/read.c linux-2.4.7-rpc_sched/fs/nfs/read.c --- linux-2.4.7/fs/nfs/read.c Wed Jun 27 23:02:29 2001 +++ linux-2.4.7-rpc_sched/fs/nfs/read.c Sat Jul 21 16:58:16 2001 @@ -59,7 +59,7 @@ static __inline__ struct nfs_read_data *nfs_readdata_alloc(void) { struct nfs_read_data *p; - p = kmem_cache_alloc(nfs_rdata_cachep, SLAB_NFS); + p = kmem_cache_alloc(nfs_rdata_cachep, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); diff -u --recursive --new-file linux-2.4.7/fs/nfs/unlink.c linux-2.4.7-rpc_sched/fs/nfs/unlink.c --- linux-2.4.7/fs/nfs/unlink.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.7-rpc_sched/fs/nfs/unlink.c Sat Jul 21 16:58:55 2001 @@ -128,7 +128,7 @@ dir_i = dir->d_inode; nfs_zap_caches(dir_i); NFS_PROTO(dir_i)->unlink_done(dir, &task->tk_msg); - rpcauth_releasecred(task->tk_auth, data->cred); + put_rpccred(data->cred); data->cred = NULL; dput(dir); } diff -u --recursive --new-file linux-2.4.7/fs/nfs/write.c linux-2.4.7-rpc_sched/fs/nfs/write.c --- linux-2.4.7/fs/nfs/write.c Wed Jun 27 23:02:29 2001 +++ linux-2.4.7-rpc_sched/fs/nfs/write.c Sat Jul 21 16:58:55 2001 @@ -109,7 +109,7 @@ static __inline__ struct nfs_page *nfs_page_alloc(void) { struct nfs_page *p; - p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL); + p = kmem_cache_alloc(nfs_page_cachep, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->wb_hash); @@ -127,7 +127,7 @@ static __inline__ struct nfs_write_data *nfs_writedata_alloc(void) { struct nfs_write_data *p; - p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NFS); + p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); @@ -181,7 +181,9 @@ if (file) - cred = nfs_file_cred(file); + cred = get_rpccred(nfs_file_cred(file)); + if (!cred) + cred = get_rpccred(NFS_I(inode)->mm_cred); dprintk("NFS: nfs_writepage_sync(%x/%Ld %d@%Ld)\n", inode->i_dev, (long long)NFS_FILEID(inode), @@ -226,6 +228,8 @@ io_error: kunmap(page); + if (cred) + put_rpccred(cred); return written? written : result; } @@ -241,6 +245,9 @@ status = (IS_ERR(req)) ? PTR_ERR(req) : 0; if (status < 0) goto out; + if (!req->wb_cred) + req->wb_cred = get_rpccred(NFS_I(inode)->mm_cred); + nfs_unlock_request(req); nfs_release_request(req); nfs_strategy(inode); out: @@ -557,13 +564,11 @@ req->wb_bytes = count; req->wb_file = file; - /* If we have a struct file, use its cached credentials - * else cache the current process' credentials. */ + /* If we have a struct file, use its cached credentials */ if (file) { get_file(file); req->wb_cred = nfs_file_cred(file); - } else - req->wb_cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); + } req->wb_inode = inode; req->wb_count = 1; @@ -608,8 +613,8 @@ /* Release struct file or cached credential */ if (req->wb_file) fput(req->wb_file); - else - rpcauth_releasecred(NFS_CLIENT(inode)->cl_auth, req->wb_cred); + else if (req->wb_cred) + put_rpccred(req->wb_cred); page_cache_release(page); nfs_page_free(req); /* wake up anyone waiting to allocate a request */ @@ -927,8 +932,6 @@ if (end > rqend) req->wb_bytes = end - req->wb_offset; - nfs_unlock_request(req); - return req; } @@ -1049,6 +1052,7 @@ goto done; status = 0; + nfs_unlock_request(req); /* If we wrote past the end of the page. * Call the strategy routine so it can send out a bunch * of requests. diff -u --recursive --new-file linux-2.4.7/fs/readdir.c linux-2.4.7-rpc_sched/fs/readdir.c --- linux-2.4.7/fs/readdir.c Mon Dec 11 22:45:42 2000 +++ linux-2.4.7-rpc_sched/fs/readdir.c Sat Jul 21 16:59:36 2001 @@ -315,7 +315,8 @@ lastdirent = buf.previous; if (lastdirent) { struct linux_dirent64 d; - d.d_off = file->f_pos; + /* get the sign extension right */ + d.d_off = (off_t)file->f_pos; copy_to_user(&lastdirent->d_off, &d.d_off, sizeof(d.d_off)); error = count - buf.count; } diff -u --recursive --new-file linux-2.4.7/include/linux/dcache.h linux-2.4.7-rpc_sched/include/linux/dcache.h --- linux-2.4.7/include/linux/dcache.h Wed Jul 4 00:42:54 2001 +++ linux-2.4.7-rpc_sched/include/linux/dcache.h Sat Jul 21 17:28:20 2001 @@ -80,6 +80,7 @@ struct super_block * d_sb; /* The root of the dentry tree */ unsigned long d_vfs_flags; void * d_fsdata; /* fs-specific data */ + unsigned long long d_verifier; /* used by nfs d_revalidate */ unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ }; diff -u --recursive --new-file linux-2.4.7/include/linux/lockd/nlm.h linux-2.4.7-rpc_sched/include/linux/lockd/nlm.h --- linux-2.4.7/include/linux/lockd/nlm.h Mon Dec 11 22:25:38 2000 +++ linux-2.4.7-rpc_sched/include/linux/lockd/nlm.h Sat Jul 21 17:49:31 2001 @@ -49,10 +49,10 @@ #define NLMPROC_CANCEL_RES 13 #define NLMPROC_UNLOCK_RES 14 #define NLMPROC_GRANTED_RES 15 +#define NLMPROC_NSM_NOTIFY 16 /* statd callback */ #define NLMPROC_SHARE 20 #define NLMPROC_UNSHARE 21 #define NLMPROC_NM_LOCK 22 #define NLMPROC_FREE_ALL 23 -#define NLMPROC_NSM_NOTIFY 24 /* statd callback */ #endif /* LINUX_LOCKD_NLM_H */ diff -u --recursive --new-file linux-2.4.7/include/linux/nfs_fs.h linux-2.4.7-rpc_sched/include/linux/nfs_fs.h --- linux-2.4.7/include/linux/nfs_fs.h Wed Jul 4 00:43:37 2001 +++ linux-2.4.7-rpc_sched/include/linux/nfs_fs.h Sat Jul 21 17:28:52 2001 @@ -63,6 +63,11 @@ */ #define NFS_SUPER_MAGIC 0x6969 +static inline struct nfs_inode_info *NFS_I(struct inode *inode) +{ + return &inode->u.nfs_i; +} + #define NFS_FH(inode) (&(inode)->u.nfs_i.fh) #define NFS_SERVER(inode) (&(inode)->i_sb->u.nfs_sb.s_server) #define NFS_CLIENT(inode) (NFS_SERVER(inode)->client) @@ -74,7 +79,6 @@ #define NFS_READTIME(inode) ((inode)->u.nfs_i.read_cache_jiffies) #define NFS_CACHE_CTIME(inode) ((inode)->u.nfs_i.read_cache_ctime) #define NFS_CACHE_MTIME(inode) ((inode)->u.nfs_i.read_cache_mtime) -#define NFS_CACHE_ATIME(inode) ((inode)->u.nfs_i.read_cache_atime) #define NFS_CACHE_ISIZE(inode) ((inode)->u.nfs_i.read_cache_isize) #define NFS_NEXTSCAN(inode) ((inode)->u.nfs_i.nextscan) #define NFS_CACHEINV(inode) \ @@ -97,8 +101,19 @@ #define NFS_FILEID(inode) ((inode)->u.nfs_i.fileid) #define NFS_FSID(inode) ((inode)->u.nfs_i.fsid) -/* Inode Flags */ -#define NFS_USE_READDIRPLUS(inode) ((NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS) ? 1 : 0) +static inline int nfs_server_caps(struct inode *inode) +{ + return NFS_SERVER(inode)->caps; +} + +static inline int NFS_USE_READDIRPLUS(struct inode *inode) +{ + if (nfs_server_caps(inode) & NFS_CAP_READDIRPLUS) + return 1; + if (NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS) + return 1; + return 0; +} /* * These are the default flags for swap requests diff -u --recursive --new-file linux-2.4.7/include/linux/nfs_fs_i.h linux-2.4.7-rpc_sched/include/linux/nfs_fs_i.h --- linux-2.4.7/include/linux/nfs_fs_i.h Tue Feb 20 02:13:00 2001 +++ linux-2.4.7-rpc_sched/include/linux/nfs_fs_i.h Sat Jul 21 16:58:56 2001 @@ -45,7 +45,6 @@ unsigned long read_cache_jiffies; __u64 read_cache_ctime; __u64 read_cache_mtime; - __u64 read_cache_atime; __u64 read_cache_isize; unsigned long attrtimeo; unsigned long attrtimeo_timestamp; @@ -73,6 +72,9 @@ struct inode *hash_next, *hash_prev; unsigned long nextscan; + + /* Credentials for shared mmap */ + struct rpc_cred *mm_cred; }; /* diff -u --recursive --new-file linux-2.4.7/include/linux/nfs_fs_sb.h linux-2.4.7-rpc_sched/include/linux/nfs_fs_sb.h --- linux-2.4.7/include/linux/nfs_fs_sb.h Wed Apr 26 02:28:56 2000 +++ linux-2.4.7-rpc_sched/include/linux/nfs_fs_sb.h Sat Jul 21 17:00:27 2001 @@ -8,6 +8,7 @@ struct rpc_clnt * client; /* RPC client handle */ struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ int flags; /* various flags */ + unsigned int caps; /* server capabilities */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ unsigned int wsize; /* write size */ @@ -29,5 +30,9 @@ struct nfs_sb_info { struct nfs_server s_server; }; + +/* Server capabilities */ +#define NFS_CAP_READDIRPLUS 1 + #endif diff -u --recursive --new-file linux-2.4.7/include/linux/sunrpc/auth.h linux-2.4.7-rpc_sched/include/linux/sunrpc/auth.h --- linux-2.4.7/include/linux/sunrpc/auth.h Wed Jul 4 00:42:55 2001 +++ linux-2.4.7-rpc_sched/include/linux/sunrpc/auth.h Sat Jul 21 17:28:52 2001 @@ -14,6 +14,8 @@ #include #include +#include + /* size of the nodename buffer */ #define UNX_MAXNODENAME 32 @@ -22,8 +24,10 @@ */ struct rpc_cred { struct rpc_cred * cr_next; /* linked list */ + struct rpc_auth * cr_auth; + struct rpc_credops * cr_ops; unsigned long cr_expire; /* when to gc */ - unsigned short cr_count; /* ref count */ + atomic_t cr_count; /* ref count */ unsigned short cr_flags; /* various flags */ #ifdef RPC_DEBUG unsigned long cr_magic; /* 0x0f4aa4f0 */ @@ -71,6 +75,9 @@ void (*destroy)(struct rpc_auth *); struct rpc_cred * (*crcreate)(int); +}; + +struct rpc_credops { void (*crdestroy)(struct rpc_cred *); int (*crmatch)(struct rpc_cred *, int); @@ -92,8 +99,7 @@ struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); struct rpc_cred * rpcauth_bindcred(struct rpc_task *); void rpcauth_holdcred(struct rpc_task *); -void rpcauth_releasecred(struct rpc_auth *, - struct rpc_cred *); +void put_rpccred(struct rpc_cred *); void rpcauth_unbindcred(struct rpc_task *); int rpcauth_matchcred(struct rpc_auth *, struct rpc_cred *, int); @@ -106,6 +112,13 @@ void rpcauth_free_credcache(struct rpc_auth *); void rpcauth_insert_credcache(struct rpc_auth *, struct rpc_cred *); + +static inline +struct rpc_cred * get_rpccred(struct rpc_cred *cred) +{ + atomic_inc(&cred->cr_count); + return cred; +} #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_AUTH_H */ diff -u --recursive --new-file linux-2.4.7/include/linux/sunrpc/clnt.h linux-2.4.7-rpc_sched/include/linux/sunrpc/clnt.h --- linux-2.4.7/include/linux/sunrpc/clnt.h Wed Jul 4 00:44:05 2001 +++ linux-2.4.7-rpc_sched/include/linux/sunrpc/clnt.h Sat Jul 21 17:49:31 2001 @@ -111,6 +111,8 @@ void rpc_release_client(struct rpc_clnt *); void rpc_getport(struct rpc_task *, struct rpc_clnt *); int rpc_register(u32, u32, int, unsigned short, int *); +u32 * rpc_call_header(struct rpc_task *task); +u32 * rpc_call_verify(struct rpc_task *task); void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); @@ -143,6 +145,11 @@ * Helper function for NFSroot support */ int rpc_getport_external(struct sockaddr_in *, __u32, __u32, int); + +/* + * Ping function + */ +void rpc_ping(struct rpc_task *task); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff -u --recursive --new-file linux-2.4.7/include/linux/sunrpc/xprt.h linux-2.4.7-rpc_sched/include/linux/sunrpc/xprt.h --- linux-2.4.7/include/linux/sunrpc/xprt.h Wed Jul 4 00:44:05 2001 +++ linux-2.4.7-rpc_sched/include/linux/sunrpc/xprt.h Sat Jul 21 17:49:31 2001 @@ -39,12 +39,14 @@ * Come Linux 2.3, we'll handle fragments directly. */ #define RPC_MAXCONG 16 -#define RPC_MAXREQS (RPC_MAXCONG + 1) +#define RPC_MAXREQS (RPC_MAXCONG + 2) #define RPC_CWNDSCALE 256 #define RPC_MAXCWND (RPC_MAXCONG * RPC_CWNDSCALE) #define RPC_INITCWND RPC_CWNDSCALE #define RPCXPRT_CONGESTED(xprt) \ ((xprt)->cong >= (xprt)->cwnd) +#define RPCXPRT_SUPERCONGESTED(xprt) \ + ((xprt)->cwnd < 2*RPC_CWNDSCALE) /* Default timeout values */ #define RPC_MAX_UDP_TIMEOUT (60*HZ) @@ -135,15 +137,14 @@ struct rpc_wait_queue sending; /* requests waiting to send */ struct rpc_wait_queue pending; /* requests in flight */ struct rpc_wait_queue backlog; /* waiting for slot */ - struct rpc_wait_queue reconn; /* waiting for reconnect */ + struct rpc_wait_queue pingwait; /* waiting on ping() */ struct rpc_rqst * free; /* free slots */ struct rpc_rqst slot[RPC_MAXREQS]; unsigned long sockstate; /* Socket state */ unsigned char shutdown : 1, /* being shut down */ nocong : 1, /* no congestion control */ stream : 1, /* TCP */ - tcp_more : 1, /* more record fragments */ - connecting : 1; /* being reconnected */ + tcp_more : 1; /* more record fragments */ /* * State of TCP reply receive stuff @@ -158,6 +159,8 @@ /* * Send stuff */ + rwlock_t sock_lock; /* lock socket info */ + spinlock_t xprt_lock; /* lock xprt info */ struct rpc_task * snd_task; /* Task blocked in send */ @@ -179,18 +182,21 @@ unsigned long); int xprt_reserve(struct rpc_task *); +int xprt_ping_reserve(struct rpc_task *); void xprt_transmit(struct rpc_task *); void xprt_receive(struct rpc_task *); int xprt_adjust_timeout(struct rpc_timeout *); void xprt_release(struct rpc_task *); +void xprt_ping_release(struct rpc_task *); void xprt_reconnect(struct rpc_task *); int xprt_clear_backlog(struct rpc_xprt *); +int xprt_tcp_pending(void); void __rpciod_tcp_dispatcher(void); -extern struct list_head rpc_xprt_pending; - #define XPRT_WSPACE 0 #define XPRT_CONNECT 1 +#define XPRT_PING 2 +#define XPRT_NORESPOND 3 #define xprt_wspace(xp) (test_bit(XPRT_WSPACE, &(xp)->sockstate)) #define xprt_test_and_set_wspace(xp) (test_and_set_bit(XPRT_WSPACE, &(xp)->sockstate)) @@ -201,10 +207,30 @@ #define xprt_test_and_set_connected(xp) (test_and_set_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_clear_connected(xp) (clear_bit(XPRT_CONNECT, &(xp)->sockstate)) -static inline -int xprt_tcp_pending(void) +static inline int xprt_pinging(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_PING, &xprt->sockstate); +} +static inline int xprt_test_and_set_pinging(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_PING, &xprt->sockstate); +} +static inline void xprt_clear_pinging(struct rpc_xprt *xprt) +{ + clear_bit(XPRT_PING, &xprt->sockstate); +} + +static inline int xprt_norespond(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_NORESPOND, &xprt->sockstate); +} +static inline int xprt_test_and_set_norespond(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_NORESPOND, &xprt->sockstate); +} +static inline void xprt_clear_norespond(struct rpc_xprt *xprt) { - return !list_empty(&rpc_xprt_pending); + clear_bit(XPRT_NORESPOND, &xprt->sockstate); } static inline diff -u --recursive --new-file linux-2.4.7/kernel/ksyms.c linux-2.4.7-rpc_sched/kernel/ksyms.c --- linux-2.4.7/kernel/ksyms.c Sat Jul 21 16:50:34 2001 +++ linux-2.4.7-rpc_sched/kernel/ksyms.c Sat Jul 21 17:01:32 2001 @@ -472,6 +472,7 @@ EXPORT_SYMBOL(cap_bset); EXPORT_SYMBOL(daemonize); EXPORT_SYMBOL(csum_partial); /* for networking and md */ +EXPORT_SYMBOL(do_sigaction); /* Program loader interfaces */ EXPORT_SYMBOL(setup_arg_pages); diff -u --recursive --new-file linux-2.4.7/net/sunrpc/Makefile linux-2.4.7-rpc_sched/net/sunrpc/Makefile --- linux-2.4.7/net/sunrpc/Makefile Fri Dec 29 23:07:24 2000 +++ linux-2.4.7-rpc_sched/net/sunrpc/Makefile Sat Jul 21 17:02:10 2001 @@ -14,7 +14,7 @@ obj-y := clnt.o xprt.o sched.o \ auth.o auth_null.o auth_unix.o \ svc.o svcsock.o svcauth.o \ - pmap_clnt.o xdr.o sunrpc_syms.o + ping.o pmap_clnt.o xdr.o sunrpc_syms.o obj-$(CONFIG_PROC_FS) += stats.o obj-$(CONFIG_SYSCTL) += sysctl.o diff -u --recursive --new-file linux-2.4.7/net/sunrpc/auth.c linux-2.4.7-rpc_sched/net/sunrpc/auth.c --- linux-2.4.7/net/sunrpc/auth.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.7-rpc_sched/net/sunrpc/auth.c Sat Jul 21 16:58:56 2001 @@ -81,42 +81,61 @@ auth->au_nextgc = jiffies + (auth->au_expire >> 1); } +/* + * Destroy an unreferenced credential + */ static inline void -rpcauth_crdestroy(struct rpc_auth *auth, struct rpc_cred *cred) +rpcauth_crdestroy(struct rpc_cred *cred) { #ifdef RPC_DEBUG if (cred->cr_magic != RPCAUTH_CRED_MAGIC) BUG(); cred->cr_magic = 0; + if (atomic_read(&cred->cr_count) || cred->cr_auth) + BUG(); #endif - if (auth->au_ops->crdestroy) - auth->au_ops->crdestroy(cred); - else - rpc_free(cred); + cred->cr_ops->crdestroy(cred); } /* - * Clear the RPC credential cache + * Destroy a list of credentials + */ +static inline +void rpcauth_destroy_credlist(struct rpc_cred *head) +{ + struct rpc_cred *cred; + + while ((cred = head) != NULL) { + head = cred->cr_next; + rpcauth_crdestroy(cred); + } +} + +/* + * Clear the RPC credential cache, and delete those credentials + * that are not referenced. */ void rpcauth_free_credcache(struct rpc_auth *auth) { - struct rpc_cred **q, *cred; - void (*destroy)(struct rpc_cred *); + struct rpc_cred **q, *cred, *free = NULL; int i; - if (!(destroy = auth->au_ops->crdestroy)) - destroy = (void (*)(struct rpc_cred *)) rpc_free; - spin_lock(&rpc_credcache_lock); for (i = 0; i < RPC_CREDCACHE_NR; i++) { q = &auth->au_credcache[i]; while ((cred = *q) != NULL) { *q = cred->cr_next; - destroy(cred); + cred->cr_auth = NULL; + if (atomic_read(&cred->cr_count) == 0) { + cred->cr_next = free; + free = cred; + } else + cred->cr_next = NULL; } } spin_unlock(&rpc_credcache_lock); + rpcauth_destroy_credlist(free); } /* @@ -133,9 +152,10 @@ for (i = 0; i < RPC_CREDCACHE_NR; i++) { q = &auth->au_credcache[i]; while ((cred = *q) != NULL) { - if (!cred->cr_count && + if (!atomic_read(&cred->cr_count) && time_before(cred->cr_expire, jiffies)) { *q = cred->cr_next; + cred->cr_auth = NULL; cred->cr_next = free; free = cred; continue; @@ -144,10 +164,7 @@ } } spin_unlock(&rpc_credcache_lock); - while ((cred = free) != NULL) { - free = cred->cr_next; - rpcauth_crdestroy(auth, cred); - } + rpcauth_destroy_credlist(free); auth->au_nextgc = jiffies + auth->au_expire; } @@ -163,8 +180,8 @@ spin_lock(&rpc_credcache_lock); cred->cr_next = auth->au_credcache[nr]; auth->au_credcache[nr] = cred; - cred->cr_count++; - cred->cr_expire = jiffies + auth->au_expire; + cred->cr_auth = auth; + get_rpccred(cred); spin_unlock(&rpc_credcache_lock); } @@ -187,7 +204,7 @@ q = &auth->au_credcache[nr]; while ((cred = *q) != NULL) { if (!(cred->cr_flags & RPCAUTH_CRED_DEAD) && - auth->au_ops->crmatch(cred, taskflags)) { + cred->cr_ops->crmatch(cred, taskflags)) { *q = cred->cr_next; break; } @@ -213,23 +230,23 @@ * Remove cred handle from cache */ static void -rpcauth_remove_credcache(struct rpc_auth *auth, struct rpc_cred *cred) +rpcauth_remove_credcache(struct rpc_cred *cred) { + struct rpc_auth *auth = cred->cr_auth; struct rpc_cred **q, *cr; int nr; nr = (cred->cr_uid & RPC_CREDCACHE_MASK); - spin_lock(&rpc_credcache_lock); q = &auth->au_credcache[nr]; while ((cr = *q) != NULL) { if (cred == cr) { *q = cred->cr_next; cred->cr_next = NULL; + cred->cr_auth = NULL; break; } q = &cred->cr_next; } - spin_unlock(&rpc_credcache_lock); } struct rpc_cred * @@ -258,7 +275,7 @@ { dprintk("RPC: matching %s cred %d\n", auth->au_ops->au_name, taskflags); - return auth->au_ops->crmatch(cred, taskflags); + return cred->cr_ops->crmatch(cred, taskflags); } void @@ -266,26 +283,25 @@ { dprintk("RPC: %4d holding %s cred %p\n", task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred); - if (task->tk_msg.rpc_cred) { - spin_lock(&rpc_credcache_lock); - task->tk_msg.rpc_cred->cr_count++; - task->tk_msg.rpc_cred->cr_expire = jiffies + task->tk_auth->au_expire; - spin_unlock(&rpc_credcache_lock); - } + if (task->tk_msg.rpc_cred) + get_rpccred(task->tk_msg.rpc_cred); } void -rpcauth_releasecred(struct rpc_auth *auth, struct rpc_cred *cred) +put_rpccred(struct rpc_cred *cred) { - spin_lock(&rpc_credcache_lock); - if (cred != NULL && cred->cr_count > 0) { - if (!--cred->cr_count && (cred->cr_flags & RPCAUTH_CRED_DEAD)) { - spin_unlock(&rpc_credcache_lock); - rpcauth_remove_credcache(auth, cred); - rpcauth_crdestroy(auth, cred); - return; - } + if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) + return; + + if (cred->cr_auth && cred->cr_flags & RPCAUTH_CRED_DEAD) + rpcauth_remove_credcache(cred); + + if (!cred->cr_auth) { + spin_unlock(&rpc_credcache_lock); + rpcauth_crdestroy(cred); + return; } + cred->cr_expire = jiffies + cred->cr_auth->au_expire; spin_unlock(&rpc_credcache_lock); } @@ -298,7 +314,7 @@ dprintk("RPC: %4d releasing %s cred %p\n", task->tk_pid, auth->au_ops->au_name, cred); - rpcauth_releasecred(auth, cred); + put_rpccred(cred); task->tk_msg.rpc_cred = NULL; } @@ -306,10 +322,11 @@ rpcauth_marshcred(struct rpc_task *task, u32 *p) { struct rpc_auth *auth = task->tk_auth; + struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %4d marshaling %s cred %p\n", - task->tk_pid, auth->au_ops->au_name, task->tk_msg.rpc_cred); - return auth->au_ops->crmarshal(task, p, + task->tk_pid, auth->au_ops->au_name, cred); + return cred->cr_ops->crmarshal(task, p, task->tk_flags & RPC_CALL_REALUID); } @@ -317,20 +334,22 @@ rpcauth_checkverf(struct rpc_task *task, u32 *p) { struct rpc_auth *auth = task->tk_auth; + struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %4d validating %s cred %p\n", - task->tk_pid, auth->au_ops->au_name, task->tk_msg.rpc_cred); - return auth->au_ops->crvalidate(task, p); + task->tk_pid, auth->au_ops->au_name, cred); + return cred->cr_ops->crvalidate(task, p); } int rpcauth_refreshcred(struct rpc_task *task) { struct rpc_auth *auth = task->tk_auth; + struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %4d refreshing %s cred %p\n", - task->tk_pid, auth->au_ops->au_name, task->tk_msg.rpc_cred); - task->tk_status = auth->au_ops->crrefresh(task); + task->tk_pid, auth->au_ops->au_name, cred); + task->tk_status = cred->cr_ops->crrefresh(task); return task->tk_status; } diff -u --recursive --new-file linux-2.4.7/net/sunrpc/auth_null.c linux-2.4.7-rpc_sched/net/sunrpc/auth_null.c --- linux-2.4.7/net/sunrpc/auth_null.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.7-rpc_sched/net/sunrpc/auth_null.c Sat Jul 21 16:58:56 2001 @@ -17,6 +17,8 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif +static struct rpc_credops null_credops; + static struct rpc_auth * nul_create(struct rpc_clnt *clnt) { @@ -52,9 +54,10 @@ if (!(cred = (struct rpc_cred *) rpc_allocate(flags, sizeof(*cred)))) return NULL; - cred->cr_count = 0; + atomic_set(&cred->cr_count, 0); cred->cr_flags = RPCAUTH_CRED_UPTODATE; cred->cr_uid = current->uid; + cred->cr_ops = &null_credops; return cred; } @@ -124,7 +127,11 @@ #endif nul_create, nul_destroy, - nul_create_cred, + nul_create_cred +}; + +static +struct rpc_credops null_credops = { nul_destroy_cred, nul_match, nul_marshal, diff -u --recursive --new-file linux-2.4.7/net/sunrpc/auth_unix.c linux-2.4.7-rpc_sched/net/sunrpc/auth_unix.c --- linux-2.4.7/net/sunrpc/auth_unix.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.7-rpc_sched/net/sunrpc/auth_unix.c Sat Jul 21 16:58:56 2001 @@ -33,6 +33,8 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif +static struct rpc_credops unix_credops; + static struct rpc_auth * unx_create(struct rpc_clnt *clnt) { @@ -71,7 +73,7 @@ if (!(cred = (struct unx_cred *) rpc_allocate(flags, sizeof(*cred)))) return NULL; - cred->uc_count = 0; + atomic_set(&cred->uc_count, 0); cred->uc_flags = RPCAUTH_CRED_UPTODATE; if (flags & RPC_TASK_ROOTCREDS) { cred->uc_uid = cred->uc_fsuid = 0; @@ -91,6 +93,7 @@ if (i < NFS_NGROUPS) cred->uc_gids[i] = NOGROUP; } + cred->uc_base.cr_ops = &unix_credops; return (struct rpc_cred *) cred; } @@ -106,7 +109,7 @@ if (!(cred = (struct unx_cred *) rpc_malloc(task, sizeof(*cred)))) return NULL; - cred->uc_count = 1; + atomic_set(&cred->uc_count, 1); cred->uc_flags = RPCAUTH_CRED_DEAD|RPCAUTH_CRED_UPTODATE; cred->uc_uid = uid; cred->uc_gid = gid; @@ -236,7 +239,11 @@ #endif unx_create, unx_destroy, - unx_create_cred, + unx_create_cred +}; + +static +struct rpc_credops unix_credops = { unx_destroy_cred, unx_match, unx_marshal, diff -u --recursive --new-file linux-2.4.7/net/sunrpc/clnt.c linux-2.4.7-rpc_sched/net/sunrpc/clnt.c --- linux-2.4.7/net/sunrpc/clnt.c Thu Apr 19 17:38:50 2001 +++ linux-2.4.7-rpc_sched/net/sunrpc/clnt.c Sat Jul 21 17:02:10 2001 @@ -55,8 +55,10 @@ static void call_refreshresult(struct rpc_task *task); static void call_timeout(struct rpc_task *task); static void call_reconnect(struct rpc_task *task); -static u32 * call_header(struct rpc_task *task); -static u32 * call_verify(struct rpc_task *task); +static void child_reconnect(struct rpc_task *); +static void child_reconnect_status(struct rpc_task *); +static void call_ping(struct rpc_task *task); +static void call_pingresult(struct rpc_task *task); /* @@ -490,7 +492,7 @@ /* Encode header and provided arguments */ encode = rpcproc_encode(clnt, task->tk_msg.rpc_proc); - if (!(p = call_header(task))) { + if (!(p = rpc_call_header(task))) { printk(KERN_INFO "RPC: call_header failed, exit EIO\n"); rpc_exit(task, -EIO); } else @@ -526,6 +528,7 @@ call_reconnect(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + struct rpc_task *child; dprintk("RPC: %4d call_reconnect status %d\n", task->tk_pid, task->tk_status); @@ -533,10 +536,31 @@ task->tk_action = call_transmit; if (task->tk_status < 0 || !clnt->cl_xprt->stream) return; - clnt->cl_stats->netreconn++; + + /* Run as a child to ensure it runs as an rpciod task */ + child = rpc_new_child(clnt, task); + if (child) { + child->tk_action = child_reconnect; + rpc_run_child(task, child, NULL); + } +} + +static void child_reconnect(struct rpc_task *task) +{ + task->tk_client->cl_stats->netreconn++; + task->tk_status = 0; + task->tk_action = child_reconnect_status; xprt_reconnect(task); } +static void child_reconnect_status(struct rpc_task *task) +{ + if (task->tk_status == -EAGAIN) + task->tk_action = child_reconnect; + else + task->tk_action = NULL; +} + /* * 5. Transmit the RPC request, and wait for reply */ @@ -595,11 +619,10 @@ task->tk_action = call_reconnect; break; } - /* - * Sleep and dream of an open connection - */ - task->tk_timeout = 5 * HZ; - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + if (RPCXPRT_SUPERCONGESTED(clnt->cl_xprt)) { + task->tk_action = call_ping; + break; + } case -ENOMEM: case -EAGAIN: task->tk_action = call_transmit; @@ -623,6 +646,7 @@ { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; + int major = 0; if (req) { struct rpc_timeout *to = &req->rq_timeout; @@ -643,17 +667,7 @@ rpc_exit(task, -EIO); return; } - if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) { - task->tk_flags |= RPC_CALL_MAJORSEEN; - if (req) - printk(KERN_NOTICE "%s: server %s not responding, still trying\n", - clnt->cl_protname, clnt->cl_server); -#ifdef RPC_DEBUG - else - printk(KERN_NOTICE "%s: task %d can't get a request slot\n", - clnt->cl_protname, task->tk_pid); -#endif - } + major = 1; if (clnt->cl_autobind) clnt->cl_port = 0; @@ -666,6 +680,8 @@ } else if (!xprt_connected(clnt->cl_xprt)) { task->tk_action = call_reconnect; clnt->cl_stats->rpcretrans++; + } else if (major && RPCXPRT_SUPERCONGESTED(clnt->cl_xprt)) { + task->tk_action = call_ping; } else { task->tk_action = call_transmit; clnt->cl_stats->rpcretrans++; @@ -687,12 +703,6 @@ dprintk("RPC: %4d call_decode (status %d)\n", task->tk_pid, task->tk_status); - if (clnt->cl_chatty && (task->tk_flags & RPC_CALL_MAJORSEEN)) { - printk(KERN_NOTICE "%s: server %s OK\n", - clnt->cl_protname, clnt->cl_server); - task->tk_flags &= ~RPC_CALL_MAJORSEEN; - } - if (task->tk_status < 12) { if (!clnt->cl_softrtry) { task->tk_action = call_transmit; @@ -706,7 +716,7 @@ } /* Verify the RPC header */ - if (!(p = call_verify(task))) + if (!(p = rpc_call_verify(task))) return; /* @@ -765,8 +775,8 @@ /* * Call header serialization */ -static u32 * -call_header(struct rpc_task *task) +u32 * +rpc_call_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_xprt *xprt = clnt->cl_xprt; @@ -786,10 +796,63 @@ } /* + * Ping a non-responding server + */ +static void +call_ping(struct rpc_task *task) +{ + task->tk_action = call_pingresult; + rpc_ping(task); +} + +/* + * Interpret the result from ping + */ +static void +call_pingresult(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = clnt->cl_xprt; + int status = task->tk_status; + + task->tk_status = 0; + if (status >= 0) { + task->tk_action = call_transmit; + return; + } + + switch(status) { + case -ECONNREFUSED: + case -ENOTCONN: + if (clnt->cl_autobind || !clnt->cl_port) { + clnt->cl_port = 0; + task->tk_action = call_bind; + break; + } + if (xprt->stream) { + task->tk_action = call_reconnect; + break; + } + case -ENOMEM: + case -ENOBUFS: + rpc_delay(task, HZ >> 4); + case -ETIMEDOUT: + task->tk_action = call_ping; + break; + default: + if (clnt->cl_chatty) + printk("%s: RPC call returned error %d\n", + clnt->cl_protname, -status); + rpc_exit(task,status); + return; + } +} + +/* * Reply header verification */ -static u32 * -call_verify(struct rpc_task *task) +u32 * +rpc_call_verify(struct rpc_task *task) { u32 *p = task->tk_rqstp->rq_rvec[0].iov_base, n; diff -u --recursive --new-file linux-2.4.7/net/sunrpc/ping.c linux-2.4.7-rpc_sched/net/sunrpc/ping.c --- linux-2.4.7/net/sunrpc/ping.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.7-rpc_sched/net/sunrpc/ping.c Sat Jul 21 17:02:10 2001 @@ -0,0 +1,218 @@ +/* + * linux/net/sunrpc/ping.c + * + * Ping routing. + * + * Copyright (C) 2000, Trond Myklebust + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define RPC_SLACK_SPACE 512 /* total overkill */ +#define RPC_PING_DELAY (15*HZ) + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_XPRT +#endif + +static void ping_call_reserve(struct rpc_task *); +static void ping_call_allocate(struct rpc_task *); +static void ping_call_encode(struct rpc_task *); +static void ping_call_transmit(struct rpc_task *); +static void ping_call_receive(struct rpc_task *); +static void ping_call_exit(struct rpc_task *); + + +static void +ping_call_reserve(struct rpc_task *task) +{ + dprintk("RPC: %4d, ping_call_reserve\n", task->tk_pid); + task->tk_status = 0; + task->tk_action = ping_call_allocate; + task->tk_timeout = task->tk_client->cl_timeout.to_resrvval; + xprt_ping_reserve(task); +} + +static void +ping_call_allocate(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + struct rpc_rqst *req = task->tk_rqstp; + unsigned int bufsiz; + + dprintk("RPC: %4d, ping_call_allocate (status %d)\n", + task->tk_pid, task->tk_status); + + task->tk_action = ping_call_exit; + if (task->tk_status < 0) + return; + + bufsiz = rpcproc_bufsiz(clnt, task->tk_msg.rpc_proc) + RPC_SLACK_SPACE; + if (!(task->tk_buffer = rpc_malloc(task, bufsiz << 1))) { + task->tk_status = -ENOMEM; + return; + } + req->rq_svec[0].iov_base = (void *)task->tk_buffer; + req->rq_svec[0].iov_len = bufsiz; + req->rq_slen = 0; + req->rq_snr = 1; + req->rq_rvec[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); + req->rq_rvec[0].iov_len = bufsiz; + req->rq_rlen = bufsiz; + req->rq_rnr = 1; + task->tk_action = ping_call_encode; +} + +static void +ping_call_encode(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + u32 *p; + + dprintk("RPC: %4d, ping_call_encode (status %d)\n", + task->tk_pid, task->tk_status); + + if (task->tk_status < 0) { + task->tk_action = ping_call_exit; + return; + } + p = rpc_call_header(task); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + task->tk_action = ping_call_transmit; +} + +static void +ping_call_transmit(struct rpc_task *task) +{ + dprintk("RPC: %4d, ping_call_transmit\n", task->tk_pid); + task->tk_action = ping_call_receive; + xprt_transmit(task); +} + +static void +ping_call_receive(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_timeout *to = &req->rq_timeout; + u32 *p; + + dprintk("RPC: %4d, ping_call_receive (status %d)\n", + task->tk_pid, task->tk_status); + + if (task->tk_status >= 0) + p = rpc_call_verify(task); + + task->tk_action = ping_call_exit; + + if (task->tk_status >= 0 || task->tk_status == -EACCES) { + task->tk_status = 0; + if (xprt_norespond(xprt)) { + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s OK\n", + clnt->cl_protname, clnt->cl_server); + xprt_clear_norespond(xprt); + } + return; + } + + switch (task->tk_status) { + case -ENOTCONN: + break; + case -ENOMEM: + case -EAGAIN: + case -ECONNREFUSED: + case -ETIMEDOUT: + if (!xprt_adjust_timeout(to)) { + task->tk_status = 0; + task->tk_action = ping_call_transmit; + break; + } + default: + if (clnt->cl_softrtry) { + task->tk_status = -EIO; + break; + } + if (clnt->cl_chatty) { + if (!xprt_test_and_set_norespond(xprt)) { + printk(KERN_NOTICE + "%s: server %s is not responding\n", + clnt->cl_protname, clnt->cl_server); + } else { + printk(KERN_NOTICE + "%s: server %s still not responding\n", + clnt->cl_protname, clnt->cl_server); + } + } + rpc_delay(task, RPC_PING_DELAY); + } +} + +static void +ping_call_exit(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + + dprintk("RPC: %4d, ping_call_exit (status %d)\n", + task->tk_pid, task->tk_status); + + task->tk_action = NULL; + xprt_ping_release(task); + + /* Sigh. rpc_delay() clears task->tk_status */ + if (task->tk_status == 0 && xprt_norespond(xprt)) + task->tk_status = -ETIMEDOUT; + + xprt_clear_pinging(xprt); + rpc_wake_up_status(&xprt->pingwait, task->tk_status); +} + +void +rpc_ping(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_task *child; + struct rpc_message msg = {0, NULL, NULL, NULL}; + + dprintk("RPC: %4d, rpc_ping\n", task->tk_pid); + + again: + if (xprt_test_and_set_pinging(xprt)) { + rpc_sleep_on(&xprt->pingwait, task, NULL, 0); + if (!xprt_pinging(xprt)) { + rpc_wake_up_task(task); + goto again; + } + dprintk("RPC: %4d, rpc_ping, waiting on completion\n", + task->tk_pid); + return; + } + + child = rpc_new_child(clnt, task); + if (!child) { + dprintk("RPC: %4d, rpc_ping, failed to create child process\n", + task->tk_pid); + xprt_clear_pinging(xprt); + rpc_wake_up_status(&xprt->pingwait, -ENOMEM); + task->tk_status = -ENOMEM; + return; + } + rpc_call_setup(child, &msg, 0); + child->tk_action = ping_call_reserve; + + dprintk("RPC: %4d, rpc_ping, running child process %4d\n", + task->tk_pid, child->tk_pid); + rpc_run_child(task, child, NULL); +} diff -u --recursive --new-file linux-2.4.7/net/sunrpc/sched.c linux-2.4.7-rpc_sched/net/sunrpc/sched.c --- linux-2.4.7/net/sunrpc/sched.c Tue Apr 3 22:45:37 2001 +++ linux-2.4.7-rpc_sched/net/sunrpc/sched.c Sat Jul 21 17:02:37 2001 @@ -30,7 +30,7 @@ /* * We give RPC the same get_free_pages priority as NFS */ -#define GFP_RPC GFP_NFS +#define GFP_RPC GFP_NOFS static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); @@ -744,7 +744,7 @@ * for readahead): * * sync user requests: GFP_KERNEL - * async requests: GFP_RPC (== GFP_NFS) + * async requests: GFP_RPC (== GFP_NOFS) * swap requests: GFP_ATOMIC (or new GFP_SWAPPER) */ void * @@ -772,8 +772,8 @@ } if (flags & RPC_TASK_ASYNC) return NULL; - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ>>4); + current->policy |= SCHED_YIELD; + schedule(); } while (!signalled()); return NULL; @@ -1072,8 +1072,6 @@ current->pgrp = 1; strcpy(current->comm, "rpciod"); - current->flags |= PF_MEMALLOC; - dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid); while (rpciod_users) { if (signalled()) { @@ -1120,8 +1118,8 @@ __rpc_schedule(); if (all_tasks) { dprintk("rpciod_killall: waiting for tasks to exit\n"); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); + current->policy |= SCHED_YIELD; + schedule(); } } @@ -1152,7 +1150,7 @@ /* * Create the rpciod thread and wait for it to start. */ - error = kernel_thread(rpciod, &rpciod_killer, 0); + error = kernel_thread(rpciod, &rpciod_killer, CLONE_SIGNAL); if (error < 0) { printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error); rpciod_users--; @@ -1191,8 +1189,8 @@ * wait briefly before checking the process id. */ current->sigpending = 0; - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); + current->policy |= SCHED_YIELD; + schedule(); /* * Display a message if we're going to wait longer. */ diff -u --recursive --new-file linux-2.4.7/net/sunrpc/sunrpc_syms.c linux-2.4.7-rpc_sched/net/sunrpc/sunrpc_syms.c --- linux-2.4.7/net/sunrpc/sunrpc_syms.c Fri Jan 12 00:53:02 2001 +++ linux-2.4.7-rpc_sched/net/sunrpc/sunrpc_syms.c Sat Jul 21 16:58:56 2001 @@ -65,7 +65,7 @@ EXPORT_SYMBOL(rpcauth_lookupcred); EXPORT_SYMBOL(rpcauth_bindcred); EXPORT_SYMBOL(rpcauth_matchcred); -EXPORT_SYMBOL(rpcauth_releasecred); +EXPORT_SYMBOL(put_rpccred); /* RPC server stuff */ EXPORT_SYMBOL(svc_create); diff -u --recursive --new-file linux-2.4.7/net/sunrpc/svc.c linux-2.4.7-rpc_sched/net/sunrpc/svc.c --- linux-2.4.7/net/sunrpc/svc.c Thu Jun 21 02:42:19 2001 +++ linux-2.4.7-rpc_sched/net/sunrpc/svc.c Sat Jul 21 17:01:32 2001 @@ -136,7 +136,7 @@ serv->sv_nrthreads++; rqstp->rq_server = serv; - error = kernel_thread((int (*)(void *)) func, rqstp, 0); + error = kernel_thread((int (*)(void *)) func, rqstp, CLONE_SIGNAL); if (error < 0) goto out_thread; error = 0; diff -u --recursive --new-file linux-2.4.7/net/sunrpc/xprt.c linux-2.4.7-rpc_sched/net/sunrpc/xprt.c --- linux-2.4.7/net/sunrpc/xprt.c Thu Apr 12 21:11:39 2001 +++ linux-2.4.7-rpc_sched/net/sunrpc/xprt.c Sat Jul 21 17:02:10 2001 @@ -75,10 +75,6 @@ * Local variables */ -/* Spinlock for critical sections in the code. */ -spinlock_t xprt_sock_lock = SPIN_LOCK_UNLOCKED; -spinlock_t xprt_lock = SPIN_LOCK_UNLOCKED; - #ifdef RPC_DEBUG # undef RPC_DEBUG_DATA # define RPCDBG_FACILITY RPCDBG_XPRT @@ -94,7 +90,7 @@ */ static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static void do_xprt_transmit(struct rpc_task *); -static void xprt_reserve_status(struct rpc_task *task); +static void xprt_alloc_slot(struct rpc_xprt *, struct rpc_task *); static void xprt_disconnect(struct rpc_xprt *); static void xprt_reconn_status(struct rpc_task *task); static struct socket *xprt_create_socket(int, struct rpc_timeout *); @@ -177,6 +173,44 @@ } /* + * Serialize write access to sockets, in order to prevent different + * requests from interfering with each other. + * Also prevents TCP socket reconnections from colliding with writes. + */ +static int +xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) +{ + int retval; + write_lock_bh(&xprt->sock_lock); + if (!xprt->snd_task) + xprt->snd_task = task; + else if (xprt->snd_task != task) { + dprintk("RPC: %4d TCP write queue full (task %d)\n", + task->tk_pid, xprt->snd_task->tk_pid); + task->tk_timeout = 0; + task->tk_status = -EAGAIN; + rpc_sleep_on(&xprt->sending, task, NULL, NULL); + } + retval = xprt->snd_task == task; + write_unlock_bh(&xprt->sock_lock); + return retval; +} + +/* + * Releases the socket for use by other requests. + */ +static void +xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) +{ + write_lock_bh(&xprt->sock_lock); + if (xprt->snd_task == task) { + xprt->snd_task = NULL; + rpc_wake_up_next(&xprt->sending); + } + write_unlock_bh(&xprt->sock_lock); +} + +/* * Write data to socket. */ static inline int @@ -290,7 +324,10 @@ if (xprt->nocong) return; - spin_lock_bh(&xprt_sock_lock); + /* + * Note: we're in a BH context + */ + spin_lock(&xprt->xprt_lock); cwnd = xprt->cwnd; if (result >= 0) { if (xprt->cong < cwnd || time_before(jiffies, xprt->congtime)) @@ -318,7 +355,7 @@ xprt->cwnd = cwnd; out: - spin_unlock_bh(&xprt_sock_lock); + spin_unlock(&xprt->xprt_lock); } /* @@ -399,6 +436,8 @@ /* * Reconnect a broken TCP connection. + * + * Note: This cannot collide with the TCP reads, as both run from rpciod */ void xprt_reconnect(struct rpc_task *task) @@ -421,15 +460,10 @@ return; } - spin_lock(&xprt_lock); - if (xprt->connecting) { - task->tk_timeout = 0; - rpc_sleep_on(&xprt->reconn, task, NULL, NULL); - spin_unlock(&xprt_lock); + if (!xprt_lock_write(xprt, task)) return; - } - xprt->connecting = 1; - spin_unlock(&xprt_lock); + if (xprt_connected(xprt)) + goto out_write; status = -ENOTCONN; if (!inet) { @@ -444,6 +478,7 @@ /* Reset TCP record info */ xprt->tcp_offset = 0; + xprt->tcp_reclen = 0; xprt->tcp_copied = 0; xprt->tcp_more = 0; @@ -472,24 +507,22 @@ dprintk("RPC: %4d connect status %d connected %d\n", task->tk_pid, status, xprt_connected(xprt)); - spin_lock_bh(&xprt_sock_lock); + write_lock_bh(&xprt->sock_lock); if (!xprt_connected(xprt)) { task->tk_timeout = xprt->timeout.to_maxval; - rpc_sleep_on(&xprt->reconn, task, xprt_reconn_status, NULL); - spin_unlock_bh(&xprt_sock_lock); + rpc_sleep_on(&xprt->sending, task, xprt_reconn_status, NULL); + write_unlock_bh(&xprt->sock_lock); return; } - spin_unlock_bh(&xprt_sock_lock); + write_unlock_bh(&xprt->sock_lock); } defer: - spin_lock(&xprt_lock); - xprt->connecting = 0; if (status < 0) { rpc_delay(task, 5*HZ); task->tk_status = -ENOTCONN; } - rpc_wake_up(&xprt->reconn); - spin_unlock(&xprt_lock); + out_write: + xprt_release_write(xprt, task); } /* @@ -504,10 +537,7 @@ dprintk("RPC: %4d xprt_reconn_timeout %d\n", task->tk_pid, task->tk_status); - spin_lock(&xprt_lock); - xprt->connecting = 0; - rpc_wake_up(&xprt->reconn); - spin_unlock(&xprt_lock); + xprt_release_write(xprt, task); } /* @@ -704,10 +734,6 @@ struct iovec riov; int want, result; - if (xprt->tcp_offset >= xprt->tcp_reclen + sizeof(xprt->tcp_recm)) { - xprt->tcp_offset = 0; - xprt->tcp_reclen = 0; - } if (xprt->tcp_offset >= sizeof(xprt->tcp_recm)) goto done; @@ -723,10 +749,6 @@ want -= result; } while (want); - /* Is this another fragment in the last message */ - if (!xprt->tcp_more) - xprt->tcp_copied = 0; /* No, so we're reading a new message */ - /* Get the record length and mask out the last fragment bit */ xprt->tcp_reclen = ntohl(xprt->tcp_recm); xprt->tcp_more = (xprt->tcp_reclen & 0x80000000) ? 0 : 1; @@ -848,14 +870,15 @@ /* Read in a new fragment marker if necessary */ /* Can we ever really expect to get completely empty fragments? */ - if ((result = tcp_read_fraghdr(xprt)) <= 0) + if ((result = tcp_read_fraghdr(xprt)) < 0) return result; avail = result; /* Read in the xid if necessary */ - if ((result = tcp_read_xid(xprt, avail)) <= 0) + if ((result = tcp_read_xid(xprt, avail)) < 0) return result; - avail = result; + if (!(avail = result)) + goto out_ok; /* Find and lock the request corresponding to this xid */ req = xprt_lookup_rqst(xprt, xprt->tcp_xid); @@ -873,9 +896,14 @@ if ((result = tcp_read_discard(xprt, avail)) < 0) return result; + out_ok: dprintk("RPC: tcp_input_record done (off %d reclen %d copied %d)\n", xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_copied); result = xprt->tcp_reclen; + xprt->tcp_reclen = 0; + xprt->tcp_offset = 0; + if (!xprt->tcp_more) + xprt->tcp_copied = 0; return result; } @@ -890,11 +918,19 @@ rpciod_wake_up(); } +int xprt_tcp_pending(void) +{ + int retval; + + spin_lock_bh(&rpc_queue_lock); + retval = !list_empty(&rpc_xprt_pending); + spin_unlock_bh(&rpc_queue_lock); + return retval; +} + static inline void xprt_append_pending(struct rpc_xprt *xprt) { - if (!list_empty(&xprt->rx_pending)) - return; spin_lock_bh(&rpc_queue_lock); if (list_empty(&xprt->rx_pending)) { list_add(&xprt->rx_pending, rpc_xprt_pending.prev); @@ -1008,11 +1044,10 @@ case TCP_ESTABLISHED: if (xprt_test_and_set_connected(xprt)) break; - spin_lock_bh(&xprt_sock_lock); + read_lock(&xprt->sock_lock); if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending) rpc_wake_up_task(xprt->snd_task); - rpc_wake_up(&xprt->reconn); - spin_unlock_bh(&xprt_sock_lock); + read_unlock(&xprt->sock_lock); break; case TCP_SYN_SENT: case TCP_SYN_RECV: @@ -1046,10 +1081,10 @@ return; if (!xprt_test_and_set_wspace(xprt)) { - spin_lock_bh(&xprt_sock_lock); + read_lock(&xprt->sock_lock); if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending) rpc_wake_up_task(xprt->snd_task); - spin_unlock_bh(&xprt_sock_lock); + read_unlock(&xprt->sock_lock); } if (test_bit(SOCK_NOSPACE, &sock->flags)) { @@ -1076,10 +1111,10 @@ return; if (!xprt_test_and_set_wspace(xprt)) { - spin_lock_bh(&xprt_sock_lock); + read_lock(&xprt->sock_lock); if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending) rpc_wake_up_task(xprt->snd_task); - spin_unlock_bh(&xprt_sock_lock); + read_unlock(&xprt->sock_lock); } if (sk->sleep && waitqueue_active(sk->sleep)) @@ -1105,55 +1140,6 @@ rpc_wake_up_task(task); } - -/* - * Serialize access to sockets, in order to prevent different - * requests from interfering with each other. - */ -static int -xprt_down_transmit(struct rpc_task *task) -{ - struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - struct rpc_rqst *req = task->tk_rqstp; - - spin_lock_bh(&xprt_sock_lock); - spin_lock(&xprt_lock); - if (xprt->snd_task && xprt->snd_task != task) { - dprintk("RPC: %4d TCP write queue full (task %d)\n", - task->tk_pid, xprt->snd_task->tk_pid); - task->tk_timeout = 0; - task->tk_status = -EAGAIN; - rpc_sleep_on(&xprt->sending, task, NULL, NULL); - } else if (!xprt->snd_task) { - xprt->snd_task = task; -#ifdef RPC_PROFILE - req->rq_xtime = jiffies; -#endif - req->rq_bytes_sent = 0; - } - spin_unlock(&xprt_lock); - spin_unlock_bh(&xprt_sock_lock); - return xprt->snd_task == task; -} - -/* - * Releases the socket for use by other requests. - */ -static inline void -xprt_up_transmit(struct rpc_task *task) -{ - struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - - if (xprt->snd_task && xprt->snd_task == task) { - spin_lock_bh(&xprt_sock_lock); - spin_lock(&xprt_lock); - xprt->snd_task = NULL; - rpc_wake_up_next(&xprt->sending); - spin_unlock(&xprt_lock); - spin_unlock_bh(&xprt_sock_lock); - } -} - /* * Place the actual RPC call. * We have to copy the iovec because sendmsg fiddles with its contents. @@ -1187,9 +1173,12 @@ *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); } - if (!xprt_down_transmit(task)) + if (!xprt_lock_write(xprt, task)) return; +#ifdef RPC_PROFILE + req->rq_xtime = jiffies; +#endif do_xprt_transmit(task); } @@ -1257,22 +1246,18 @@ switch (status) { case -ENOMEM: /* Protect against (udp|tcp)_write_space */ - spin_lock_bh(&xprt_sock_lock); + write_lock_bh(&xprt->sock_lock); if (!xprt_wspace(xprt)) { task->tk_timeout = req->rq_timeout.to_current; rpc_sleep_on(&xprt->sending, task, NULL, NULL); } - spin_unlock_bh(&xprt_sock_lock); - return; + write_unlock_bh(&xprt->sock_lock); case -EAGAIN: - /* Keep holding the socket if it is blocked */ - rpc_delay(task, HZ>>4); return; - case -ECONNREFUSED: - case -ENOTCONN: - if (!xprt->stream) - return; default: + if (xprt->stream) + xprt_disconnect(xprt); + req->rq_bytes_sent = 0; goto out_release; } @@ -1283,7 +1268,7 @@ rpc_add_timer(task, xprt_timer); rpc_unlock_task(task); out_release: - xprt_up_transmit(task); + xprt_release_write(xprt, task); } /* @@ -1318,10 +1303,12 @@ dprintk("RPC: %4d xprt_reserve cong = %ld cwnd = %ld\n", task->tk_pid, xprt->cong, xprt->cwnd); - spin_lock_bh(&xprt_sock_lock); - xprt_reserve_status(task); + spin_lock_bh(&xprt->xprt_lock); + if (!RPCXPRT_CONGESTED(xprt)) + xprt_alloc_slot(xprt, task); if (task->tk_rqstp) { task->tk_timeout = 0; + xprt->cong += RPC_CWNDSCALE; } else if (!task->tk_timeout) { task->tk_status = -ENOBUFS; } else { @@ -1329,42 +1316,55 @@ task->tk_status = -EAGAIN; rpc_sleep_on(&xprt->backlog, task, NULL, NULL); } - spin_unlock_bh(&xprt_sock_lock); + spin_unlock_bh(&xprt->xprt_lock); dprintk("RPC: %4d xprt_reserve returns %d\n", task->tk_pid, task->tk_status); return task->tk_status; } /* - * Reservation callback + * Reserve a ping RPC call slot. */ -static void -xprt_reserve_status(struct rpc_task *task) +int +xprt_ping_reserve(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - struct rpc_rqst *req; - if (xprt->shutdown) { - task->tk_status = -EIO; - } else if (task->tk_status < 0) { - /* NOP */ - } else if (task->tk_rqstp) { - /* We've already been given a request slot: NOP */ - } else { - if (RPCXPRT_CONGESTED(xprt) || !(req = xprt->free)) - goto out_nofree; - /* OK: There's room for us. Grab a free slot and bump - * congestion value */ - xprt->free = req->rq_next; - req->rq_next = NULL; - xprt->cong += RPC_CWNDSCALE; - task->tk_rqstp = req; - xprt_request_init(task, xprt); + /* We already have an initialized request. */ + if (task->tk_rqstp) + return 0; - if (xprt->free) - xprt_clear_backlog(xprt); - } + dprintk("RPC: %4d xprt_ping_reserve cong = %ld cwnd = %ld\n", + task->tk_pid, xprt->cong, xprt->cwnd); + spin_lock_bh(&xprt->xprt_lock); + xprt_alloc_slot(xprt, task); + if (!task->tk_rqstp) + task->tk_status = -ENOBUFS; + spin_unlock_bh(&xprt->xprt_lock); + dprintk("RPC: %4d xprt_ping_reserve returns %d\n", + task->tk_pid, task->tk_status); + return task->tk_status; +} + +/* + * Reserve a slot + */ +static void +xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) +{ + struct rpc_rqst *req; + + if (!(req = xprt->free)) + goto out_nofree; + /* OK: There's room for us. Grab a free slot and bump + * congestion value */ + xprt->free = req->rq_next; + req->rq_next = NULL; + task->tk_rqstp = req; + xprt_request_init(task, xprt); + if (xprt->free) + xprt_clear_backlog(xprt); return; out_nofree: @@ -1396,13 +1396,17 @@ /* * Release an RPC call slot */ -void -xprt_release(struct rpc_task *task) +static void +__xprt_release(struct rpc_task *task, int congvalue) { struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req; - xprt_up_transmit(task); + if (xprt->snd_task == task) { + if (xprt->stream) + xprt_disconnect(xprt); + xprt_release_write(xprt, task); + } if (!(req = task->tk_rqstp)) return; task->tk_rqstp = NULL; @@ -1416,15 +1420,28 @@ rpc_remove_wait_queue(task); } - spin_lock_bh(&xprt_sock_lock); + spin_lock_bh(&xprt->xprt_lock); req->rq_next = xprt->free; xprt->free = req; - /* Decrease congestion value. */ - xprt->cong -= RPC_CWNDSCALE; + if (congvalue) { + /* Decrease congestion value. */ + xprt->cong -= congvalue; + xprt_clear_backlog(xprt); + } + spin_unlock_bh(&xprt->xprt_lock); +} + +void +xprt_release(struct rpc_task *task) +{ + __xprt_release(task, RPC_CWNDSCALE); +} - xprt_clear_backlog(xprt); - spin_unlock_bh(&xprt_sock_lock); +void +xprt_ping_release(struct rpc_task *task) +{ + __xprt_release(task, 0); } /* @@ -1481,6 +1498,8 @@ } else xprt->cwnd = RPC_INITCWND; xprt->congtime = jiffies; + rwlock_init(&xprt->sock_lock); + spin_lock_init(&xprt->xprt_lock); init_waitqueue_head(&xprt->cong_wait); /* Set timeout parameters */ @@ -1494,7 +1513,7 @@ xprt->pending = RPC_INIT_WAITQ("xprt_pending"); xprt->sending = RPC_INIT_WAITQ("xprt_sending"); xprt->backlog = RPC_INIT_WAITQ("xprt_backlog"); - xprt->reconn = RPC_INIT_WAITQ("xprt_reconn"); + xprt->pingwait= RPC_INIT_WAITQ("xprt_pingwait"); /* initialize free list */ for (i = 0, req = xprt->slot; i < RPC_MAXREQS-1; i++, req++) @@ -1630,7 +1649,7 @@ rpc_wake_up(&xprt->sending); rpc_wake_up(&xprt->pending); rpc_wake_up(&xprt->backlog); - rpc_wake_up(&xprt->reconn); + rpc_wake_up(&xprt->pingwait); if (waitqueue_active(&xprt->cong_wait)) wake_up(&xprt->cong_wait); }