diff -u --recursive --new-file linux-2.4.10/fs/lockd/clntlock.c linux-2.4.10-tune/fs/lockd/clntlock.c --- linux-2.4.10/fs/lockd/clntlock.c Mon Sep 24 00:33:20 2001 +++ linux-2.4.10-tune/fs/lockd/clntlock.c Wed Sep 26 16:28:13 2001 @@ -8,6 +8,7 @@ #define __KERNEL_SYSCALLS__ +#include #include #include #include @@ -17,7 +18,7 @@ #include #include -#define NLMDBG_FACILITY NLMDBG_CIENT +#define NLMDBG_FACILITY NLMDBG_CLIENT /* * Local function prototypes @@ -131,29 +132,63 @@ */ /* + * Mark the locks for reclaiming. + * FIXME: In 2.5 we don't want to iterate through any global file_lock_list. + * Maintain NLM lock reclaiming lists in the nlm_host instead. + */ +static +void nlmclnt_mark_reclaim(struct nlm_host *host) +{ + struct file_lock *fl; + struct inode *inode; + struct list_head *tmp; + + list_for_each(tmp, &file_lock_list) { + fl = list_entry(tmp, struct file_lock, fl_link); + + inode = fl->fl_file->f_dentry->d_inode; + if (inode->i_sb->s_magic != NFS_SUPER_MAGIC) + continue; + if (fl->fl_u.nfs_fl.host != host) + continue; + if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED)) + continue; + fl->fl_u.nfs_fl.flags |= NFS_LCK_RECLAIM; + } +} + +/* + * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number, + * that we mark locks for reclaiming, and that we bump the pseudo NSM state. + */ +static inline +void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate) +{ + host->h_monitored = 0; + host->h_nsmstate = newstate; + host->h_state++; + host->h_nextrebind = 0; + nlm_rebind_host(host); + nlmclnt_mark_reclaim(host); + dprintk("NLM: reclaiming locks for host %s", host->h_name); +} + +/* * Reclaim all locks on server host. We do this by spawning a separate * reclaimer thread. - * FIXME: should bump MOD_USE_COUNT while reclaiming */ void nlmclnt_recovery(struct nlm_host *host, u32 newstate) { - if (!host->h_reclaiming++) { + if (host->h_reclaiming++) { if (host->h_nsmstate == newstate) return; - printk(KERN_WARNING - "lockd: Uh-oh! Interfering reclaims for host %s", - host->h_name); - host->h_monitored = 0; - host->h_nsmstate = newstate; - host->h_state++; - nlm_release_host(host); + nlmclnt_prepare_reclaim(host, newstate); } else { - host->h_monitored = 0; - host->h_nsmstate = newstate; - host->h_state++; + nlmclnt_prepare_reclaim(host, newstate); nlm_get_host(host); - kernel_thread(reclaimer, host, 0); + MOD_INC_USE_COUNT; + kernel_thread(reclaimer, host, CLONE_SIGNAL); } } @@ -163,32 +198,38 @@ struct nlm_host *host = (struct nlm_host *) ptr; struct nlm_wait *block; struct list_head *tmp; + struct file_lock *fl; + struct inode *inode; + daemonize(); reparent_to_init(); snprintf(current->comm, sizeof(current->comm), "%s-reclaim", host->h_name); - + /* This one ensures that our parent doesn't terminate while the * reclaim is in progress */ lock_kernel(); lockd_up(); - /* First, reclaim all locks that have been granted previously. */ + /* First, reclaim all locks that have been marked. */ restart: - tmp = file_lock_list.next; - while (tmp != &file_lock_list) { - struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); - struct inode *inode = fl->fl_file->f_dentry->d_inode; - if (inode->i_sb->s_magic == NFS_SUPER_MAGIC && - nlm_cmp_addr(NFS_ADDR(inode), &host->h_addr) && - fl->fl_u.nfs_fl.state != host->h_state && - (fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED)) { - fl->fl_u.nfs_fl.flags &= ~ NFS_LCK_GRANTED; - nlmclnt_reclaim(host, fl); /* This sleeps */ - goto restart; - } - tmp = tmp->next; + list_for_each(tmp, &file_lock_list) { + fl = list_entry(tmp, struct file_lock, fl_link); + + inode = fl->fl_file->f_dentry->d_inode; + if (inode->i_sb->s_magic != NFS_SUPER_MAGIC) + continue; + if (fl->fl_u.nfs_fl.host != host) + continue; + if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_RECLAIM)) + continue; + + fl->fl_u.nfs_fl.flags &= ~NFS_LCK_RECLAIM; + nlmclnt_reclaim(host, fl); + if (signalled()) + break; + goto restart; } host->h_reclaiming = 0; @@ -206,6 +247,7 @@ nlm_release_host(host); lockd_down(); unlock_kernel(); + MOD_DEC_USE_COUNT; return 0; } diff -u --recursive --new-file linux-2.4.10/fs/lockd/host.c linux-2.4.10-tune/fs/lockd/host.c --- linux-2.4.10/fs/lockd/host.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.10-tune/fs/lockd/host.c Mon Sep 24 22:31:22 2001 @@ -51,7 +51,8 @@ struct nlm_host * nlmsvc_lookup_host(struct svc_rqst *rqstp) { - return nlm_lookup_host(rqstp->rq_client, &rqstp->rq_addr, 0, 0); + return nlm_lookup_host(rqstp->rq_client, &rqstp->rq_addr, + rqstp->rq_prot, rqstp->rq_vers); } /* @@ -97,7 +98,9 @@ nlm_gc_hosts(); for (hp = &nlm_hosts[hash]; (host = *hp); hp = &host->h_next) { - if (host->h_version != version || host->h_proto != proto) + if (proto && host->h_proto != proto) + continue; + if (version && host->h_version != version) continue; if (nlm_match_host(host, clnt, sin)) { @@ -325,7 +328,8 @@ } dprintk("lockd: delete host %s\n", host->h_name); *q = host->h_next; - if (host->h_monitored) + /* Don't unmonitor hosts that have been invalidated */ + if (host->h_monitored && !host->h_killed) nsm_unmonitor(host); if ((clnt = host->h_rpcclnt) != NULL) { if (atomic_read(&clnt->cl_users)) { diff -u --recursive --new-file linux-2.4.10/fs/lockd/lockd_syms.c linux-2.4.10-tune/fs/lockd/lockd_syms.c --- linux-2.4.10/fs/lockd/lockd_syms.c Thu Mar 9 15:57:08 2000 +++ linux-2.4.10-tune/fs/lockd/lockd_syms.c Tue Sep 25 11:51:15 2001 @@ -35,8 +35,4 @@ EXPORT_SYMBOL(nlmsvc_invalidate_client); EXPORT_SYMBOL(nlmsvc_ops); -/* Configuration at insmod time */ -EXPORT_SYMBOL(nlmsvc_grace_period); -EXPORT_SYMBOL(nlmsvc_timeout); - #endif /* CONFIG_MODULES */ diff -u --recursive --new-file linux-2.4.10/fs/lockd/mon.c linux-2.4.10-tune/fs/lockd/mon.c --- linux-2.4.10/fs/lockd/mon.c Tue Jun 12 04:15:27 2001 +++ linux-2.4.10-tune/fs/lockd/mon.c Mon Sep 24 00:37:22 2001 @@ -43,7 +43,7 @@ args.addr = host->h_addr.sin_addr.s_addr; args.prog = NLM_PROGRAM; - args.vers = 1; + args.vers = host->h_version; args.proc = NLMPROC_NSM_NOTIFY; memset(res, 0, sizeof(*res)); diff -u --recursive --new-file linux-2.4.10/fs/lockd/svc.c linux-2.4.10-tune/fs/lockd/svc.c --- linux-2.4.10/fs/lockd/svc.c Mon Sep 24 00:33:20 2001 +++ linux-2.4.10-tune/fs/lockd/svc.c Thu Sep 27 01:29:34 2001 @@ -43,7 +43,7 @@ static DECLARE_MUTEX(nlmsvc_sema); static unsigned int nlmsvc_users; static pid_t nlmsvc_pid; -unsigned long nlmsvc_grace_period; +int nlmsvc_grace_period; unsigned long nlmsvc_timeout; static DECLARE_MUTEX_LOCKED(lockd_start); @@ -56,6 +56,25 @@ unsigned long nlm_grace_period; unsigned long nlm_timeout = LOCKD_DFLT_TIMEO; +static unsigned long set_grace_period(void) +{ + unsigned long grace_period; + + /* Note: nlm_timeout should always be nonzero */ + if (nlm_grace_period) + grace_period = ((nlm_grace_period + nlm_timeout - 1) + / nlm_timeout) * nlm_timeout * HZ; + else + grace_period = nlm_timeout * 5 * HZ; + nlmsvc_grace_period = 1; + return grace_period + jiffies; +} + +static inline void clear_grace_period(void) +{ + nlmsvc_grace_period = 0; +} + /* * This is the lockd kernel thread */ @@ -84,7 +103,7 @@ spin_lock_irq(¤t->sigmask_lock); siginitsetinv(¤t->blocked, sigmask(SIGKILL)); recalc_sigpending(current); - spin_unlock_irq(¤t->sigmask_lock); + spin_unlock_irq(¤t->sigmask_lock); /* kick rpciod */ rpciod_up(); @@ -93,21 +112,10 @@ if (!nlm_timeout) nlm_timeout = LOCKD_DFLT_TIMEO; - -#ifdef RPC_DEBUG - nlmsvc_grace_period = 10 * HZ; -#else - if (nlm_grace_period) { - nlmsvc_grace_period += (1 + nlm_grace_period / nlm_timeout) - * nlm_timeout * HZ; - } else { - nlmsvc_grace_period += 5 * nlm_timeout * HZ; - } -#endif - - grace_period_expire = nlmsvc_grace_period + jiffies; nlmsvc_timeout = nlm_timeout * HZ; + grace_period_expire = set_grace_period(); + /* * The main request loop. We don't terminate until the last * NFS mount or NFS daemon has gone away, and we've been sent a @@ -122,13 +130,7 @@ spin_unlock_irq(¤t->sigmask_lock); if (nlmsvc_ops) { nlmsvc_ops->detach(); -#ifdef RPC_DEBUG - nlmsvc_grace_period = 10 * HZ; -#else - nlmsvc_grace_period += 5 * nlm_timeout * HZ; - -#endif - grace_period_expire = nlmsvc_grace_period + jiffies; + grace_period_expire = set_grace_period(); } } @@ -140,16 +142,15 @@ */ if (!nlmsvc_grace_period) { timeout = nlmsvc_retry_blocked(); - } else if (time_before(nlmsvc_grace_period, jiffies)) - nlmsvc_grace_period = 0; + } else if (time_before(grace_period_expire, jiffies)) + clear_grace_period(); /* * Find a socket with data available and call its * recvfrom routine. */ - if ((err = svc_recv(serv, rqstp, timeout)) == -EAGAIN - || err == -EINTR - ) + err = svc_recv(serv, rqstp, timeout); + if (err == -EAGAIN || err == -EINTR) continue; if (err < 0) { printk(KERN_WARNING @@ -345,7 +346,7 @@ * Define NLM program and procedures */ static struct svc_version nlmsvc_version1 = { - 1, 16, nlmsvc_procedures, NULL + 1, 17, nlmsvc_procedures, NULL }; static struct svc_version nlmsvc_version3 = { 3, 24, nlmsvc_procedures, NULL diff -u --recursive --new-file linux-2.4.10/fs/lockd/svc4proc.c linux-2.4.10-tune/fs/lockd/svc4proc.c --- linux-2.4.10/fs/lockd/svc4proc.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.10-tune/fs/lockd/svc4proc.c Thu Sep 27 01:23:33 2001 @@ -420,6 +420,8 @@ void *resp) { struct sockaddr_in saddr = rqstp->rq_addr; + int vers = rqstp->rq_vers; + int prot = rqstp->rq_prot; struct nlm_host *host; dprintk("lockd: SM_NOTIFY called\n"); @@ -435,8 +437,8 @@ /* Obtain the host pointer for this NFS server and try to * reclaim all locks we hold on this server. */ - saddr.sin_addr.s_addr = argp->addr; - if ((host = nlm_lookup_host(NULL, &saddr, IPPROTO_UDP, 1)) != NULL) { + saddr.sin_addr.s_addr = argp->addr; + if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) { nlmclnt_recovery(host, argp->state); nlm_release_host(host); } @@ -444,7 +446,7 @@ /* If we run on an NFS server, delete all locks held by the client */ if (nlmsvc_ops != NULL) { struct svc_client *clnt; - saddr.sin_addr.s_addr = argp->addr; + saddr.sin_addr.s_addr = argp->addr; if ((clnt = nlmsvc_ops->exp_getclient(&saddr)) != NULL && (host = nlm_lookup_host(clnt, &saddr, 0, 0)) != NULL) { nlmsvc_free_host_resources(host); @@ -549,7 +551,8 @@ PROC(cancel_res, cancelres, norep, res, void), PROC(unlock_res, unlockres, norep, res, void), PROC(granted_res, grantedres, norep, res, void), - PROC(none, void, void, void, void), + /* statd callback */ + PROC(sm_notify, reboot, void, reboot, void), PROC(none, void, void, void, void), PROC(none, void, void, void, void), PROC(none, void, void, void, void), @@ -558,6 +561,4 @@ PROC(nm_lock, lockargs, res, args, res), PROC(free_all, notify, void, args, void), - /* statd callback */ - PROC(sm_notify, reboot, void, reboot, void), }; diff -u --recursive --new-file linux-2.4.10/fs/lockd/svcproc.c linux-2.4.10-tune/fs/lockd/svcproc.c --- linux-2.4.10/fs/lockd/svcproc.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.10-tune/fs/lockd/svcproc.c Thu Sep 27 01:24:20 2001 @@ -445,6 +445,8 @@ void *resp) { struct sockaddr_in saddr = rqstp->rq_addr; + int vers = rqstp->rq_vers; + int prot = rqstp->rq_prot; struct nlm_host *host; dprintk("lockd: SM_NOTIFY called\n"); @@ -460,8 +462,8 @@ /* Obtain the host pointer for this NFS server and try to * reclaim all locks we hold on this server. */ - saddr.sin_addr.s_addr = argp->addr; - if ((host = nlm_lookup_host(NULL, &saddr, IPPROTO_UDP, 1)) != NULL) { + saddr.sin_addr.s_addr = argp->addr; + if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) { nlmclnt_recovery(host, argp->state); nlm_release_host(host); } @@ -574,7 +576,8 @@ PROC(cancel_res, cancelres, norep, res, void), PROC(unlock_res, unlockres, norep, res, void), PROC(granted_res, grantedres, norep, res, void), - PROC(none, void, void, void, void), + /* statd callback */ + PROC(sm_notify, reboot, void, reboot, void), PROC(none, void, void, void, void), PROC(none, void, void, void, void), PROC(none, void, void, void, void), @@ -583,6 +586,4 @@ PROC(nm_lock, lockargs, res, args, res), PROC(free_all, notify, void, args, void), - /* statd callback */ - PROC(sm_notify, reboot, void, reboot, void), }; diff -u --recursive --new-file linux-2.4.10/fs/lockd/svcsubs.c linux-2.4.10-tune/fs/lockd/svcsubs.c --- linux-2.4.10/fs/lockd/svcsubs.c Sat Jun 24 06:12:53 2000 +++ linux-2.4.10-tune/fs/lockd/svcsubs.c Mon Sep 24 22:13:59 2001 @@ -305,6 +305,7 @@ dprintk("lockd: invalidating client for %s\n", host->h_name); nlmsvc_free_host_resources(host); host->h_expires = 0; + host->h_killed = 1; nlm_release_host(host); } } diff -u --recursive --new-file linux-2.4.10/fs/lockd/xdr.c linux-2.4.10-tune/fs/lockd/xdr.c --- linux-2.4.10/fs/lockd/xdr.c Mon Sep 24 00:33:20 2001 +++ linux-2.4.10-tune/fs/lockd/xdr.c Tue Sep 25 10:27:31 2001 @@ -367,7 +367,8 @@ if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN))) return 0; argp->state = ntohl(*p++); - argp->addr = ntohl(*p++); + /* Preserve the address in network byte order */ + argp->addr = *p++; return xdr_argsize_check(rqstp, p); } diff -u --recursive --new-file linux-2.4.10/fs/lockd/xdr4.c linux-2.4.10-tune/fs/lockd/xdr4.c --- linux-2.4.10/fs/lockd/xdr4.c Mon Sep 24 00:33:20 2001 +++ linux-2.4.10-tune/fs/lockd/xdr4.c Tue Sep 25 11:03:10 2001 @@ -373,7 +373,8 @@ if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN))) return 0; argp->state = ntohl(*p++); - argp->addr = ntohl(*p++); + /* Preserve the address in network byte order */ + argp->addr = *p++; return xdr_argsize_check(rqstp, p); } diff -u --recursive --new-file linux-2.4.10/fs/namei.c linux-2.4.10-tune/fs/namei.c --- linux-2.4.10/fs/namei.c Mon Sep 24 00:33:20 2001 +++ linux-2.4.10-tune/fs/namei.c Mon Sep 24 00:36:35 2001 @@ -443,7 +443,7 @@ while (*name=='/') name++; if (!*name) - goto return_base; + goto return_reval; inode = nd->dentry->d_inode; if (current->link_count) @@ -562,7 +562,7 @@ inode = nd->dentry->d_inode; /* fallthrough */ case 1: - goto return_base; + goto return_reval; } if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { err = nd->dentry->d_op->d_hash(nd->dentry, &this); @@ -613,6 +613,17 @@ nd->last_type = LAST_DOT; else if (this.len == 2 && this.name[1] == '.') nd->last_type = LAST_DOTDOT; +return_reval: + /* + * We bypassed the ordinary revalidation routines, so + * NFS wants to check the cached inode for staleness. + */ + inode = nd->dentry->d_inode; + if (inode && inode->i_op && inode->i_op->check_stale) { + err = inode->i_op->check_stale(inode); + if (err) + break; + } return_base: return 0; out_dput: diff -u --recursive --new-file linux-2.4.10/fs/nfs/dir.c linux-2.4.10-tune/fs/nfs/dir.c --- linux-2.4.10/fs/nfs/dir.c Tue Jun 12 20:15:08 2001 +++ linux-2.4.10-tune/fs/nfs/dir.c Tue Sep 25 18:52:42 2001 @@ -34,8 +34,11 @@ #define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ +static loff_t nfs_dir_llseek(struct file *, loff_t, int); static int nfs_readdir(struct file *, void *, filldir_t); static struct dentry *nfs_lookup(struct inode *, struct dentry *); +static int nfs_cached_lookup(struct inode *, struct dentry *, + struct nfs_fh *, struct nfs_fattr *); static int nfs_create(struct inode *, struct dentry *, int); static int nfs_mkdir(struct inode *, struct dentry *, int); static int nfs_rmdir(struct inode *, struct dentry *); @@ -47,6 +50,7 @@ struct inode *, struct dentry *); struct file_operations nfs_dir_operations = { + llseek: nfs_dir_llseek, read: generic_read_dir, readdir: nfs_readdir, open: nfs_open, @@ -66,8 +70,28 @@ permission: nfs_permission, revalidate: nfs_revalidate, setattr: nfs_notify_change, + check_stale: nfs_check_stale, }; +static loff_t nfs_dir_llseek(struct file *file, loff_t offset, int origin) +{ + switch (origin) { + case 1: + if (offset == 0) { + offset = file->f_pos; + break; + } + case 2: + return -EINVAL; + } + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_reada = 0; + file->f_version = ++event; + } + return (offset <= 0) ? 0 : offset; +} + typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int); typedef struct { struct file *file; @@ -108,13 +132,15 @@ error = NFS_PROTO(inode)->readdir(inode, cred, desc->entry->cookie, buffer, NFS_SERVER(inode)->dtsize, desc->plus); /* We requested READDIRPLUS, but the server doesn't grok it */ - if (desc->plus && error == -ENOTSUPP) { - NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; - desc->plus = 0; - goto again; - } - if (error < 0) + if (error < 0) { + if (error == -ENOTSUPP && desc->plus) { + NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; + NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + desc->plus = 0; + goto again; + } goto error; + } SetPageUptodate(page); kunmap(page); /* Ensure consistent page alignment of the data. @@ -195,7 +221,6 @@ dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index); - desc->plus = NFS_USE_READDIRPLUS(inode); page = read_cache_page(&inode->i_data, desc->page_index, (filler_t *)nfs_readdir_filler, desc); if (IS_ERR(page)) { @@ -247,6 +272,24 @@ return res; } +static unsigned int nfs_type2dtype[] = { + DT_UNKNOWN, + DT_REG, + DT_DIR, + DT_BLK, + DT_CHR, + DT_LNK, + DT_SOCK, + DT_UNKNOWN, + DT_FIFO +}; + +static inline +unsigned int nfs_type_to_d_type(enum nfs_ftype type) +{ + return nfs_type2dtype[type]; +} + /* * Once we've found the start of the dirent within a page: fill 'er up... */ @@ -263,11 +306,17 @@ dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target); for(;;) { + unsigned d_type = DT_UNKNOWN; /* Note: entry->prev_cookie contains the cookie for * retrieving the current dirent on the server */ fileid = nfs_fileid_to_ino_t(entry->ino); + + /* Use readdirplus info */ + if (desc->plus && (entry->fattr->valid & NFS_ATTR_FATTR)) + d_type = nfs_type_to_d_type(entry->fattr->type); + res = filldir(dirent, entry->name, entry->len, - entry->prev_cookie, fileid, DT_UNKNOWN); + entry->prev_cookie, fileid, d_type); if (res < 0) break; file->f_pos = desc->target = entry->cookie; @@ -334,7 +383,8 @@ /* Reset read descriptor so it searches the page cache from * the start upon the next call to readdir_search_pagecache() */ desc->page_index = 0; - memset(desc->entry, 0, sizeof(*desc->entry)); + desc->entry->cookie = desc->entry->prev_cookie = 0; + desc->entry->eof = 0; out: dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status); return status; @@ -353,9 +403,11 @@ nfs_readdir_descriptor_t my_desc, *desc = &my_desc; struct nfs_entry my_entry; + struct nfs_fh fh; + struct nfs_fattr fattr; long res; - res = nfs_revalidate(dentry); + res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res < 0) return res; @@ -366,12 +418,16 @@ * itself. */ memset(desc, 0, sizeof(*desc)); - memset(&my_entry, 0, sizeof(my_entry)); - desc->file = filp; desc->target = filp->f_pos; - desc->entry = &my_entry; desc->decode = NFS_PROTO(inode)->decode_dirent; + desc->plus = NFS_USE_READDIRPLUS(inode); + + my_entry.cookie = my_entry.prev_cookie = 0; + my_entry.eof = 0; + my_entry.fh = &fh; + my_entry.fattr = &fattr; + desc->entry = &my_entry; while(!desc->entry->eof) { res = readdir_search_pagecache(desc); @@ -401,6 +457,27 @@ return 0; } +static inline +void nfs_renew_verifier(struct inode *dir, struct dentry *dentry) +{ + dentry->d_verifier = NFS_CACHE_MTIME(dir); +} + +/* + * A check for whether or not the parent directory has changed. + * In the case it has, we assume that the dentries are untrustworthy + * and may need to be looked up again. + */ +static inline +int nfs_check_verifier(struct inode *dir, struct dentry *dentry) +{ + if (IS_ROOT(dentry)) + return 1; + if (nfs_revalidate_inode(NFS_SERVER(dir), dir)) + return 0; + return dentry->d_verifier == NFS_CACHE_MTIME(dir); +} + /* * Whenever an NFS operation succeeds, we know that the dentry * is valid, so we update the revalidation timestamp. @@ -408,50 +485,34 @@ static inline void nfs_renew_times(struct dentry * dentry) { dentry->d_time = jiffies; + nfs_renew_verifier(dentry->d_parent->d_inode, dentry); } -static inline int nfs_dentry_force_reval(struct dentry *dentry, int flags) +static inline +int nfs_lookup_verify_inode(struct inode *inode, int flags) { - struct inode *inode = dentry->d_inode; - unsigned long timeout = NFS_ATTRTIMEO(inode); - + struct nfs_server *server = NFS_SERVER(inode); /* - * If it's the last lookup in a series, we use a stricter - * cache consistency check by looking at the parent mtime. - * - * If it's been modified in the last hour, be really strict. - * (This still means that we can avoid doing unnecessary - * work on directories like /usr/share/bin etc which basically - * never change). + * If we're interested in close-to-open cache consistency, + * then we revalidate the inode upon lookup. */ - if (!(flags & LOOKUP_CONTINUE)) { - long diff = CURRENT_TIME - dentry->d_parent->d_inode->i_mtime; - - if (diff < 15*60) - timeout = 0; - } - - return time_after(jiffies,dentry->d_time + timeout); + if (!(server->flags & NFS_MOUNT_NOCTO) && !(flags & LOOKUP_CONTINUE)) + NFS_CACHEINV(inode); + return nfs_revalidate_inode(server, inode); } /* * We judge how long we want to trust negative * dentries by looking at the parent inode mtime. * - * If mtime is close to present time, we revalidate - * more often. + * If parent mtime has changed, we revalidate, else we wait for a + * period corresponding to the parent's attribute cache timeout value. */ -#define NFS_REVALIDATE_NEGATIVE (1 * HZ) -static inline int nfs_neg_need_reval(struct dentry *dentry) +static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry) { - struct inode *dir = dentry->d_parent->d_inode; - unsigned long timeout = NFS_ATTRTIMEO(dir); - long diff = CURRENT_TIME - dir->i_mtime; - - if (diff < 5*60 && timeout > NFS_REVALIDATE_NEGATIVE) - timeout = NFS_REVALIDATE_NEGATIVE; - - return time_after(jiffies, dentry->d_time + timeout); + if (!nfs_check_verifier(dir, dentry)) + return 1; + return time_after(jiffies, dentry->d_time + NFS_ATTRTIMEO(dir)); } /* @@ -462,9 +523,8 @@ * NOTE! The hit can be a negative hit too, don't assume * we have an inode! * - * If the dentry is older than the revalidation interval, - * we do a new lookup and verify that the dentry is still - * correct. + * If the parent directory is seen to have changed, we throw out the + * cached dentry and do a new lookup. */ static int nfs_lookup_revalidate(struct dentry * dentry, int flags) { @@ -477,13 +537,9 @@ lock_kernel(); dir = dentry->d_parent->d_inode; inode = dentry->d_inode; - /* - * If we don't have an inode, let's look at the parent - * directory mtime to get a hint about how often we - * should validate things.. - */ + if (!inode) { - if (nfs_neg_need_reval(dentry)) + if (nfs_neg_need_reval(dir, dentry)) goto out_bad; goto out_valid; } @@ -494,48 +550,49 @@ goto out_bad; } - if (!nfs_dentry_force_reval(dentry, flags)) + /* Force a full look up iff the parent directory has changed */ + if (nfs_check_verifier(dir, dentry)) { + if (nfs_lookup_verify_inode(inode, flags)) + goto out_bad; goto out_valid; + } - if (IS_ROOT(dentry)) { - __nfs_revalidate_inode(NFS_SERVER(inode), inode); + error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); + if (!error) { + if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) + goto out_bad; + if (nfs_lookup_verify_inode(inode, flags)) + goto out_bad; goto out_valid_renew; } - /* - * Do a new lookup and check the dentry attributes. - */ + if (NFS_STALE(inode)) + goto out_bad; + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); if (error) goto out_bad; - - /* Inode number matches? */ - if (!(fattr.valid & NFS_ATTR_FATTR) || - NFS_FSID(inode) != fattr.fsid || - NFS_FILEID(inode) != fattr.fileid) + if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) goto out_bad; - - /* Ok, remember that we successfully checked it.. */ - nfs_refresh_inode(inode, &fattr); - - if (nfs_inode_is_stale(inode, &fhandle, &fattr)) + if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; out_valid_renew: nfs_renew_times(dentry); -out_valid: + out_valid: unlock_kernel(); return 1; -out_bad: - shrink_dcache_parent(dentry); - /* If we have submounts, don't unhash ! */ - if (have_submounts(dentry)) - goto out_valid; - d_drop(dentry); - /* Purge readdir caches. */ - nfs_zap_caches(dir); - if (inode && S_ISDIR(inode->i_mode)) + out_bad: + NFS_CACHEINV(dir); + if (inode && S_ISDIR(inode->i_mode)) { + /* Purge readdir caches. */ nfs_zap_caches(inode); + /* If we have submounts, don't unhash ! */ + if (have_submounts(dentry)) + goto out_valid; + shrink_dcache_parent(dentry); + } + d_drop(dentry); unlock_kernel(); return 0; } @@ -594,6 +651,20 @@ error = -ENOMEM; dentry->d_op = &nfs_dentry_operations; + error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); + if (!error) { + error = -EACCES; + inode = nfs_fhget(dentry, &fhandle, &fattr); + if (inode) { + if (!(NFS_SERVER(dir)->flags & NFS_MOUNT_NOCTO)) + NFS_CACHEINV(inode); + d_add(dentry, inode); + nfs_renew_times(dentry); + error = 0; + } + goto out; + } + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); inode = NULL; if (error == -ENOENT) @@ -604,12 +675,83 @@ if (inode) { no_entry: d_add(dentry, inode); - nfs_renew_times(dentry); error = 0; } + nfs_renew_times(dentry); } out: return ERR_PTR(error); +} + +static inline +int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry) +{ + struct nfs_entry *entry = desc->entry; + int status; + + while((status = dir_decode(desc)) == 0) { + if (entry->len != dentry->d_name.len) + continue; + if (memcmp(entry->name, dentry->d_name.name, entry->len)) + continue; + if (!(entry->fattr->valid & NFS_ATTR_FATTR)) + continue; + break; + } + return status; +} + +/* + * Use the cached Readdirplus results in order to avoid a LOOKUP call + * whenever we believe that the parent directory has not changed. + * + * We assume that any file creation/rename changes the directory mtime. + * As this results in a page cache invalidation whenever it occurs, + * we don't require any other tests for cache coherency. + */ +static +int nfs_cached_lookup(struct inode *dir, struct dentry *dentry, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + nfs_readdir_descriptor_t desc; + struct nfs_server *server; + struct nfs_entry entry; + struct page *page; + int res; + + if (!NFS_USE_READDIRPLUS(dir)) + return -ENOENT; + server = NFS_SERVER(dir); + if (server->flags & NFS_MOUNT_NOAC) + return -ENOENT; + nfs_revalidate_inode(server, dir); + + entry.fh = fh; + entry.fattr = fattr; + + desc.decode = NFS_PROTO(dir)->decode_dirent; + desc.entry = &entry; + desc.page_index = 0; + desc.plus = 1; + + for(;(page = find_get_page(&dir->i_data, desc.page_index)); desc.page_index++) { + + res = -EIO; + if (Page_Uptodate(page)) { + desc.ptr = kmap(page); + res = find_dirent_name(&desc, page, dentry); + kunmap(page); + } + page_cache_release(page); + + if (res == 0) + goto out_found; + if (res != -EAGAIN) + break; + } + return -ENOENT; + out_found: + return 0; } /* diff -u --recursive --new-file linux-2.4.10/fs/nfs/flushd.c linux-2.4.10-tune/fs/nfs/flushd.c --- linux-2.4.10/fs/nfs/flushd.c Wed Jun 27 23:02:29 2001 +++ linux-2.4.10-tune/fs/nfs/flushd.c Mon Sep 24 00:38:16 2001 @@ -47,6 +47,8 @@ */ #define NFSDBG_FACILITY NFSDBG_PAGECACHE +#define NFS_SCAN_RESO (30*HZ) + /* * This is the wait queue all cluster daemons sleep on */ @@ -118,7 +120,7 @@ cache->task->tk_status = -ENOMEM; rpc_wake_up_task(cache->task); } - interruptible_sleep_on_timeout(&cache->request_wait, 1 * HZ); + interruptible_sleep_on_timeout(&cache->request_wait, NFS_SCAN_RESO); } out: unlock_kernel(); @@ -205,7 +207,7 @@ lock_kernel(); if (time_after(NFS_NEXTSCAN(inode), time)) NFS_NEXTSCAN(inode) = time; - mintimeout = jiffies + 1 * HZ; + mintimeout = jiffies + NFS_SCAN_RESO; if (time_before(mintimeout, NFS_NEXTSCAN(inode))) mintimeout = NFS_NEXTSCAN(inode); inode_append_flushd(inode); @@ -263,8 +265,8 @@ } dprintk("NFS: %4d flushd back to sleep\n", task->tk_pid); - if (time_after(jiffies + 1 * HZ, delay)) - delay = 1 * HZ; + if (time_after(jiffies + NFS_SCAN_RESO, delay)) + delay = NFS_SCAN_RESO; else delay = delay - jiffies; task->tk_status = 0; diff -u --recursive --new-file linux-2.4.10/fs/nfs/inode.c linux-2.4.10-tune/fs/nfs/inode.c --- linux-2.4.10/fs/nfs/inode.c Mon Sep 24 00:33:20 2001 +++ linux-2.4.10-tune/fs/nfs/inode.c Mon Sep 24 00:36:54 2001 @@ -312,6 +312,7 @@ if (data->flags & NFS_MOUNT_NOAC) { data->acregmin = data->acregmax = 0; data->acdirmin = data->acdirmax = 0; + sb->s_flags |= MS_SYNCHRONOUS; } server->acregmin = data->acregmin*HZ; server->acregmax = data->acregmax*HZ; @@ -323,6 +324,7 @@ if (!server->hostname) goto out_unlock; strcpy(server->hostname, data->hostname); + server->caps = 0; nfsv3_try_again: /* Check NFS protocol revision and initialize RPC op vector @@ -331,6 +333,7 @@ #ifdef CONFIG_NFS_V3 server->rpc_ops = &nfs_v3_clientops; version = 3; + server->caps |= NFS_CAP_READDIRPLUS; if (data->version < 4) { printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); goto out_unlock; @@ -577,50 +580,49 @@ nfs_zap_caches(inode); } +/* Don't use READDIRPLUS on directories that we believe are too large */ +#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE) + /* * Fill in inode information from the fattr. */ static void nfs_fill_inode(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr) { - /* - * Check whether the mode has been set, as we only want to - * do this once. (We don't allow inodes to change types.) + NFS_FILEID(inode) = fattr->fileid; + NFS_FSID(inode) = fattr->fsid; + inode->i_mode = fattr->mode; + /* Why so? Because we want revalidate for devices/FIFOs, and + * that's precisely what we have in nfs_file_inode_operations. */ - if (inode->i_mode == 0) { - NFS_FILEID(inode) = fattr->fileid; - NFS_FSID(inode) = fattr->fsid; - inode->i_mode = fattr->mode; - /* Why so? Because we want revalidate for devices/FIFOs, and - * that's precisely what we have in nfs_file_inode_operations. - */ - inode->i_op = &nfs_file_inode_operations; - if (S_ISREG(inode->i_mode)) { - inode->i_fop = &nfs_file_operations; - inode->i_data.a_ops = &nfs_file_aops; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &nfs_dir_inode_operations; - inode->i_fop = &nfs_dir_operations; - } else if (S_ISLNK(inode->i_mode)) - inode->i_op = &nfs_symlink_inode_operations; - else - init_special_inode(inode, inode->i_mode, fattr->rdev); - /* - * Preset the size and mtime, as there's no need - * to invalidate the caches. - */ - inode->i_size = nfs_size_to_loff_t(fattr->size); - inode->i_mtime = nfs_time_to_secs(fattr->mtime); - inode->i_atime = nfs_time_to_secs(fattr->atime); - inode->i_ctime = nfs_time_to_secs(fattr->ctime); - NFS_CACHE_CTIME(inode) = fattr->ctime; - NFS_CACHE_MTIME(inode) = fattr->mtime; - NFS_CACHE_ISIZE(inode) = fattr->size; - NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); - NFS_ATTRTIMEO_UPDATE(inode) = jiffies; - memcpy(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)); - } - nfs_refresh_inode(inode, fattr); + inode->i_op = &nfs_file_inode_operations; + if (S_ISREG(inode->i_mode)) { + inode->i_fop = &nfs_file_operations; + inode->i_data.a_ops = &nfs_file_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &nfs_dir_inode_operations; + inode->i_fop = &nfs_dir_operations; + if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) + && fattr->size <= NFS_LIMIT_READDIRPLUS) + NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; + } else if (S_ISLNK(inode->i_mode)) + inode->i_op = &nfs_symlink_inode_operations; + else + init_special_inode(inode, inode->i_mode, fattr->rdev); + /* + * Preset the size and mtime, as there's no need + * to invalidate the caches. + */ + inode->i_size = nfs_size_to_loff_t(fattr->size); + inode->i_mtime = nfs_time_to_secs(fattr->mtime); + inode->i_atime = nfs_time_to_secs(fattr->atime); + inode->i_ctime = nfs_time_to_secs(fattr->ctime); + NFS_CACHE_CTIME(inode) = fattr->ctime; + NFS_CACHE_MTIME(inode) = fattr->mtime; + NFS_CACHE_ISIZE(inode) = fattr->size; + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); + NFS_ATTRTIMEO_UPDATE(inode) = jiffies; + memcpy(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)); } struct nfs_find_desc { @@ -650,27 +652,6 @@ return 1; } -int -nfs_inode_is_stale(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr) -{ - /* Empty inodes are not stale */ - if (!inode->i_mode) - return 0; - - if ((fattr->mode & S_IFMT) != (inode->i_mode & S_IFMT)) - return 1; - - if (is_bad_inode(inode) || NFS_STALE(inode)) - return 1; - - /* Has the filehandle changed? If so is the old one stale? */ - if (memcmp(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)) != 0 && - __nfs_revalidate_inode(NFS_SERVER(inode),inode) == -ESTALE) - return 1; - - return 0; -} - /* * This is our own version of iget that looks up inodes by file handle * instead of inode number. We use this technique instead of using @@ -713,7 +694,19 @@ if (!(inode = iget4(sb, ino, nfs_find_actor, &desc))) goto out_no_inode; - nfs_fill_inode(inode, fh, fattr); + /* + * Check whether the mode has been set, as we only want to + * do this once. (We don't allow inodes to change types.) + */ + if (inode->i_mode == 0) { + nfs_fill_inode(inode, fh, fattr); + nfs_refresh_inode(inode, fattr); + + /* We don't trust READDIRPLUS attributes */ + if (fattr->valid & NFS_ATTR_RDPLUS) + NFS_CACHEINV(inode); + } else if (!(fattr->valid & NFS_ATTR_RDPLUS)) + nfs_refresh_inode(inode, fattr); dprintk("NFS: __nfs_fhget(%x/%Ld ct=%d)\n", inode->i_dev, (long long)NFS_FILEID(inode), atomic_read(&inode->i_count)); @@ -736,7 +729,7 @@ /* * Make sure the inode is up-to-date. */ - error = nfs_revalidate(dentry); + error = nfs_revalidate_inode(NFS_SERVER(inode),inode); if (error) { #ifdef NFS_PARANOIA printk("nfs_notify_change: revalidate failed, error=%d\n", error); @@ -805,6 +798,21 @@ { struct inode *inode = dentry->d_inode; return nfs_revalidate_inode(NFS_SERVER(inode), inode); +} + +/* + * Another revalidation function: This one checks inodes for staleness + * when we've bypassed the ordinary dcache revalidation routines. + * e.g. open(".") + */ +int +nfs_check_stale(struct inode *inode) +{ + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO)) + NFS_CACHEINV(inode); + if (NFS_STALE(inode)) + return -ESTALE; + return 0; } /* diff -u --recursive --new-file linux-2.4.10/fs/nfs/nfs2xdr.c linux-2.4.10-tune/fs/nfs/nfs2xdr.c --- linux-2.4.10/fs/nfs/nfs2xdr.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.10-tune/fs/nfs/nfs2xdr.c Mon Sep 24 00:36:17 2001 @@ -419,7 +419,7 @@ bufsiz = bufsiz >> 2; p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->cookie); + *p++ = htonl(args->cookie & 0xFFFFFFFF); *p++ = htonl(bufsiz); /* see above */ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); @@ -504,7 +504,7 @@ entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); entry->prev_cookie = entry->cookie; - entry->cookie = ntohl(*p++); + entry->cookie = (s64)((off_t)ntohl(*p++)); entry->eof = !p[0] && p[1]; return p; diff -u --recursive --new-file linux-2.4.10/fs/nfs/nfs3proc.c linux-2.4.10-tune/fs/nfs/nfs3proc.c --- linux-2.4.10/fs/nfs/nfs3proc.c Mon Dec 4 03:01:01 2000 +++ linux-2.4.10-tune/fs/nfs/nfs3proc.c Tue Sep 25 18:48:04 2001 @@ -80,7 +80,8 @@ status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR, fhandle, fattr, 0); dprintk("NFS reply lookup: %d\n", status); - nfs_refresh_inode(dir, &dir_attr); + if (status >= 0) + status = nfs_refresh_inode(dir, &dir_attr); return status; } @@ -362,8 +363,8 @@ struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs_fattr dir_attr; - struct nfs3_createargs arg = { NFS_FH(dir), name->name, name->len, - sattr, 0, { 0, 0 } }; + struct nfs3_mkdirargs arg = { NFS_FH(dir), name->name, name->len, + sattr }; struct nfs3_diropres res = { &dir_attr, fhandle, fattr }; int status; @@ -477,6 +478,9 @@ if (status < 0) goto error; status = rpc_call(server->client, NFS3PROC_FSINFO, fhandle, info, 0); + if (status < 0) + goto error; + status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0); error: dprintk("NFS reply statfs: %d\n", status); diff -u --recursive --new-file linux-2.4.10/fs/nfs/nfs3xdr.c linux-2.4.10-tune/fs/nfs/nfs3xdr.c --- linux-2.4.10/fs/nfs/nfs3xdr.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.10-tune/fs/nfs/nfs3xdr.c Mon Sep 24 00:36:54 2001 @@ -523,6 +523,13 @@ return 0; } +/* Hack to sign-extending 32-bit cookies */ +static inline +u64 nfs_transform_cookie64(u64 cookie) +{ + return (cookie & 0x80000000) ? (cookie ^ 0xFFFFFFFF00000000) : cookie; +} + /* * Encode arguments to readdir call */ @@ -533,7 +540,7 @@ int buflen, replen; p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_hyper(p, args->cookie); + p = xdr_encode_hyper(p, nfs_transform_cookie64(args->cookie)); *p++ = args->verf[0]; *p++ = args->verf[1]; if (args->plus) { @@ -635,6 +642,7 @@ nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus) { struct nfs_entry old = *entry; + u64 cookie; if (!*p++) { if (!*p) @@ -648,24 +656,25 @@ entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); entry->prev_cookie = entry->cookie; - p = xdr_decode_hyper(p, &entry->cookie); + p = xdr_decode_hyper(p, &cookie); + entry->cookie = nfs_transform_cookie64(cookie); if (plus) { - p = xdr_decode_post_op_attr(p, &entry->fattr); + entry->fattr->valid = 0; + p = xdr_decode_post_op_attr(p, entry->fattr); + if (entry->fattr->valid != 0) + entry->fattr->valid |= NFS_ATTR_RDPLUS; /* In fact, a post_op_fh3: */ if (*p++) { - p = xdr_decode_fhandle(p, &entry->fh); + p = xdr_decode_fhandle(p, entry->fh); /* Ugh -- server reply was truncated */ if (p == NULL) { dprintk("NFS: FH truncated\n"); *entry = old; return ERR_PTR(-EAGAIN); } - } else { - /* If we don't get a file handle, the attrs - * aren't worth a lot. */ - entry->fattr.valid = 0; - } + } else + memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); } entry->eof = !p[0] && p[1]; diff -u --recursive --new-file linux-2.4.10/fs/nfs/read.c linux-2.4.10-tune/fs/nfs/read.c --- linux-2.4.10/fs/nfs/read.c Wed Jun 27 23:02:29 2001 +++ linux-2.4.10-tune/fs/nfs/read.c Mon Sep 24 00:35:25 2001 @@ -59,7 +59,7 @@ static __inline__ struct nfs_read_data *nfs_readdata_alloc(void) { struct nfs_read_data *p; - p = kmem_cache_alloc(nfs_rdata_cachep, SLAB_NFS); + p = kmem_cache_alloc(nfs_rdata_cachep, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); diff -u --recursive --new-file linux-2.4.10/fs/nfs/write.c linux-2.4.10-tune/fs/nfs/write.c --- linux-2.4.10/fs/nfs/write.c Thu Aug 16 18:39:37 2001 +++ linux-2.4.10-tune/fs/nfs/write.c Mon Sep 24 00:35:52 2001 @@ -109,7 +109,7 @@ static __inline__ struct nfs_page *nfs_page_alloc(void) { struct nfs_page *p; - p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL); + p = kmem_cache_alloc(nfs_page_cachep, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->wb_hash); @@ -127,7 +127,7 @@ static __inline__ struct nfs_write_data *nfs_writedata_alloc(void) { struct nfs_write_data *p; - p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NFS); + p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); @@ -288,7 +288,7 @@ goto out; do_it: lock_kernel(); - if (NFS_SERVER(inode)->rsize >= PAGE_CACHE_SIZE) { + if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode)) { err = nfs_writepage_async(NULL, inode, page, 0, offset); if (err >= 0) err = 0; @@ -1031,7 +1031,7 @@ * If wsize is smaller than page size, update and write * page synchronously. */ - if (NFS_SERVER(inode)->wsize < PAGE_SIZE) + if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE || IS_SYNC(inode)) return nfs_writepage_sync(file, inode, page, offset, count); /* diff -u --recursive --new-file linux-2.4.10/include/linux/dcache.h linux-2.4.10-tune/include/linux/dcache.h --- linux-2.4.10/include/linux/dcache.h Wed Aug 15 23:21:11 2001 +++ linux-2.4.10-tune/include/linux/dcache.h Mon Sep 24 00:44:11 2001 @@ -80,6 +80,7 @@ struct super_block * d_sb; /* The root of the dentry tree */ unsigned long d_vfs_flags; void * d_fsdata; /* fs-specific data */ + unsigned long long d_verifier; /* used by nfs d_revalidate */ unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ }; diff -u --recursive --new-file linux-2.4.10/include/linux/fs.h linux-2.4.10-tune/include/linux/fs.h --- linux-2.4.10/include/linux/fs.h Mon Sep 24 00:33:29 2001 +++ linux-2.4.10-tune/include/linux/fs.h Wed Sep 26 15:35:37 2001 @@ -837,6 +837,7 @@ int (*revalidate) (struct dentry *); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct dentry *, struct iattr *); + int (*check_stale) (struct inode *); }; /* diff -u --recursive --new-file linux-2.4.10/include/linux/lockd/lockd.h linux-2.4.10-tune/include/linux/lockd/lockd.h --- linux-2.4.10/include/linux/lockd/lockd.h Mon Sep 24 20:25:26 2001 +++ linux-2.4.10-tune/include/linux/lockd/lockd.h Thu Sep 27 01:38:36 2001 @@ -47,6 +47,7 @@ unsigned short h_authflavor; /* RPC authentication type */ unsigned short h_reclaiming : 1, h_inuse : 1, + h_killed : 1, h_monitored : 1; wait_queue_head_t h_gracewait; /* wait while reclaiming */ u32 h_state; /* pseudo-state counter */ @@ -120,7 +121,7 @@ #ifdef CONFIG_LOCKD_V4 extern struct svc_procedure nlmsvc_procedures4[]; #endif -extern unsigned long nlmsvc_grace_period; +extern int nlmsvc_grace_period; extern unsigned long nlmsvc_timeout; /* diff -u --recursive --new-file linux-2.4.10/include/linux/lockd/nlm.h linux-2.4.10-tune/include/linux/lockd/nlm.h --- linux-2.4.10/include/linux/lockd/nlm.h Mon Aug 13 21:28:01 2001 +++ linux-2.4.10-tune/include/linux/lockd/nlm.h Mon Sep 24 00:51:01 2001 @@ -49,10 +49,10 @@ #define NLMPROC_CANCEL_RES 13 #define NLMPROC_UNLOCK_RES 14 #define NLMPROC_GRANTED_RES 15 +#define NLMPROC_NSM_NOTIFY 16 /* statd callback */ #define NLMPROC_SHARE 20 #define NLMPROC_UNSHARE 21 #define NLMPROC_NM_LOCK 22 #define NLMPROC_FREE_ALL 23 -#define NLMPROC_NSM_NOTIFY 24 /* statd callback */ #endif /* LINUX_LOCKD_NLM_H */ diff -u --recursive --new-file linux-2.4.10/include/linux/nfs_flushd.h linux-2.4.10-tune/include/linux/nfs_flushd.h --- linux-2.4.10/include/linux/nfs_flushd.h Wed Aug 15 23:24:35 2001 +++ linux-2.4.10-tune/include/linux/nfs_flushd.h Mon Sep 24 00:51:36 2001 @@ -13,8 +13,8 @@ * flushing out requests. If it exceeds the hard limit, we stall until * it drops again. */ -#define MAX_REQUEST_SOFT 192 -#define MAX_REQUEST_HARD 256 +#define MAX_REQUEST_SOFT 8192 +#define MAX_REQUEST_HARD 32768 /* * Maximum number of requests per write cluster. diff -u --recursive --new-file linux-2.4.10/include/linux/nfs_fs.h linux-2.4.10-tune/include/linux/nfs_fs.h --- linux-2.4.10/include/linux/nfs_fs.h Thu Aug 16 18:39:37 2001 +++ linux-2.4.10-tune/include/linux/nfs_fs.h Wed Sep 26 15:36:13 2001 @@ -46,10 +46,10 @@ * The upper limit on timeouts for the exponential backoff algorithm. */ #define NFS_MAX_RPC_TIMEOUT (6*HZ) -#define NFS_READ_DELAY (2*HZ) -#define NFS_WRITEBACK_DELAY (5*HZ) +#define NFS_READ_DELAY (60*HZ) +#define NFS_WRITEBACK_DELAY (60*HZ) #define NFS_WRITEBACK_LOCKDELAY (60*HZ) -#define NFS_COMMIT_DELAY (5*HZ) +#define NFS_COMMIT_DELAY (60*HZ) /* * Size of the lookup cache in units of number of entries cached. @@ -101,8 +101,15 @@ #define NFS_FILEID(inode) ((inode)->u.nfs_i.fileid) #define NFS_FSID(inode) ((inode)->u.nfs_i.fsid) -/* Inode Flags */ -#define NFS_USE_READDIRPLUS(inode) ((NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS) ? 1 : 0) +static inline int nfs_server_capable(struct inode *inode, int cap) +{ + return NFS_SERVER(inode)->caps & cap; +} + +static inline int NFS_USE_READDIRPLUS(struct inode *inode) +{ + return NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS; +} /* * These are the default flags for swap requests @@ -152,6 +159,7 @@ extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); +extern int nfs_check_stale(struct inode *); extern int nfs_notify_change(struct dentry *, struct iattr *); /* diff -u --recursive --new-file linux-2.4.10/include/linux/nfs_fs_i.h linux-2.4.10-tune/include/linux/nfs_fs_i.h --- linux-2.4.10/include/linux/nfs_fs_i.h Mon Sep 24 00:44:12 2001 +++ linux-2.4.10-tune/include/linux/nfs_fs_i.h Wed Sep 26 15:16:25 2001 @@ -99,5 +99,6 @@ * Lock flag values */ #define NFS_LCK_GRANTED 0x0001 /* lock has been granted */ +#define NFS_LCK_RECLAIM 0x0002 /* lock marked for reclaiming */ #endif diff -u --recursive --new-file linux-2.4.10/include/linux/nfs_fs_sb.h linux-2.4.10-tune/include/linux/nfs_fs_sb.h --- linux-2.4.10/include/linux/nfs_fs_sb.h Wed Apr 26 02:28:56 2000 +++ linux-2.4.10-tune/include/linux/nfs_fs_sb.h Mon Sep 24 00:36:54 2001 @@ -8,6 +8,7 @@ struct rpc_clnt * client; /* RPC client handle */ struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ int flags; /* various flags */ + unsigned int caps; /* server capabilities */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ unsigned int wsize; /* write size */ @@ -29,5 +30,9 @@ struct nfs_sb_info { struct nfs_server s_server; }; + +/* Server capabilities */ +#define NFS_CAP_READDIRPLUS 1 + #endif diff -u --recursive --new-file linux-2.4.10/include/linux/nfs_xdr.h linux-2.4.10-tune/include/linux/nfs_xdr.h --- linux-2.4.10/include/linux/nfs_xdr.h Mon Jan 29 21:07:43 2001 +++ linux-2.4.10-tune/include/linux/nfs_xdr.h Mon Sep 24 00:36:54 2001 @@ -35,6 +35,7 @@ #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ #define NFS_ATTR_FATTR 0x0002 /* post-op attributes */ #define NFS_ATTR_FATTR_V3 0x0004 /* NFSv3 attributes */ +#define NFS_ATTR_RDPLUS 0x0008 /* Made in readdirplus */ /* * Info on the file system @@ -112,8 +113,8 @@ const char * name; unsigned int len; int eof; - struct nfs_fh fh; - struct nfs_fattr fattr; + struct nfs_fh *fh; + struct nfs_fattr *fattr; }; /* diff -u --recursive --new-file linux-2.4.10/include/linux/sunrpc/clnt.h linux-2.4.10-tune/include/linux/sunrpc/clnt.h --- linux-2.4.10/include/linux/sunrpc/clnt.h Wed Aug 15 23:24:26 2001 +++ linux-2.4.10-tune/include/linux/sunrpc/clnt.h Wed Sep 26 16:24:11 2001 @@ -111,6 +111,8 @@ void rpc_release_client(struct rpc_clnt *); void rpc_getport(struct rpc_task *, struct rpc_clnt *); int rpc_register(u32, u32, int, unsigned short, int *); +u32 * rpc_call_header(struct rpc_task *task); +u32 * rpc_call_verify(struct rpc_task *task); void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); @@ -143,6 +145,11 @@ * Helper function for NFSroot support */ int rpc_getport_external(struct sockaddr_in *, __u32, __u32, int); + +/* + * Ping function + */ +void rpc_ping(struct rpc_task *task); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff -u --recursive --new-file linux-2.4.10/include/linux/sunrpc/xprt.h linux-2.4.10-tune/include/linux/sunrpc/xprt.h --- linux-2.4.10/include/linux/sunrpc/xprt.h Mon Sep 24 00:33:30 2001 +++ linux-2.4.10-tune/include/linux/sunrpc/xprt.h Mon Sep 24 00:51:01 2001 @@ -39,12 +39,14 @@ * Come Linux 2.3, we'll handle fragments directly. */ #define RPC_MAXCONG 16 -#define RPC_MAXREQS (RPC_MAXCONG + 1) +#define RPC_MAXREQS (RPC_MAXCONG + 2) #define RPC_CWNDSCALE 256 #define RPC_MAXCWND (RPC_MAXCONG * RPC_CWNDSCALE) #define RPC_INITCWND RPC_CWNDSCALE #define RPCXPRT_CONGESTED(xprt) \ ((xprt)->cong >= (xprt)->cwnd) +#define RPCXPRT_SUPERCONGESTED(xprt) \ + ((xprt)->cwnd < 2*RPC_CWNDSCALE) /* Default timeout values */ #define RPC_MAX_UDP_TIMEOUT (60*HZ) @@ -135,6 +137,7 @@ struct rpc_wait_queue sending; /* requests waiting to send */ struct rpc_wait_queue pending; /* requests in flight */ struct rpc_wait_queue backlog; /* waiting for slot */ + struct rpc_wait_queue pingwait; /* waiting on ping() */ struct rpc_rqst * free; /* free slots */ struct rpc_rqst slot[RPC_MAXREQS]; unsigned long sockstate; /* Socket state */ @@ -179,10 +182,12 @@ unsigned long); int xprt_reserve(struct rpc_task *); +int xprt_ping_reserve(struct rpc_task *); void xprt_transmit(struct rpc_task *); void xprt_receive(struct rpc_task *); int xprt_adjust_timeout(struct rpc_timeout *); void xprt_release(struct rpc_task *); +void xprt_ping_release(struct rpc_task *); void xprt_reconnect(struct rpc_task *); int xprt_clear_backlog(struct rpc_xprt *); int xprt_tcp_pending(void); @@ -190,6 +195,8 @@ #define XPRT_WSPACE 0 #define XPRT_CONNECT 1 +#define XPRT_PING 2 +#define XPRT_NORESPOND 3 #define xprt_wspace(xp) (test_bit(XPRT_WSPACE, &(xp)->sockstate)) #define xprt_test_and_set_wspace(xp) (test_and_set_bit(XPRT_WSPACE, &(xp)->sockstate)) @@ -199,6 +206,32 @@ #define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_test_and_set_connected(xp) (test_and_set_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_clear_connected(xp) (clear_bit(XPRT_CONNECT, &(xp)->sockstate)) + +static inline int xprt_pinging(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_PING, &xprt->sockstate); +} +static inline int xprt_test_and_set_pinging(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_PING, &xprt->sockstate); +} +static inline void xprt_clear_pinging(struct rpc_xprt *xprt) +{ + clear_bit(XPRT_PING, &xprt->sockstate); +} + +static inline int xprt_norespond(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_NORESPOND, &xprt->sockstate); +} +static inline int xprt_test_and_set_norespond(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_NORESPOND, &xprt->sockstate); +} +static inline void xprt_clear_norespond(struct rpc_xprt *xprt) +{ + clear_bit(XPRT_NORESPOND, &xprt->sockstate); +} static inline void rpciod_tcp_dispatcher(void) diff -u --recursive --new-file linux-2.4.10/net/sunrpc/Makefile linux-2.4.10-tune/net/sunrpc/Makefile --- linux-2.4.10/net/sunrpc/Makefile Fri Dec 29 23:07:24 2000 +++ linux-2.4.10-tune/net/sunrpc/Makefile Mon Sep 24 00:38:01 2001 @@ -14,7 +14,7 @@ obj-y := clnt.o xprt.o sched.o \ auth.o auth_null.o auth_unix.o \ svc.o svcsock.o svcauth.o \ - pmap_clnt.o xdr.o sunrpc_syms.o + ping.o pmap_clnt.o xdr.o sunrpc_syms.o obj-$(CONFIG_PROC_FS) += stats.o obj-$(CONFIG_SYSCTL) += sysctl.o diff -u --recursive --new-file linux-2.4.10/net/sunrpc/clnt.c linux-2.4.10-tune/net/sunrpc/clnt.c --- linux-2.4.10/net/sunrpc/clnt.c Mon Sep 24 00:33:35 2001 +++ linux-2.4.10-tune/net/sunrpc/clnt.c Mon Sep 24 00:38:01 2001 @@ -57,8 +57,8 @@ static void call_reconnect(struct rpc_task *task); static void child_reconnect(struct rpc_task *); static void child_reconnect_status(struct rpc_task *); -static u32 * call_header(struct rpc_task *task); -static u32 * call_verify(struct rpc_task *task); +static void call_ping(struct rpc_task *task); +static void call_pingresult(struct rpc_task *task); /* @@ -491,7 +491,7 @@ /* Encode header and provided arguments */ encode = rpcproc_encode(clnt, task->tk_msg.rpc_proc); - if (!(p = call_header(task))) { + if (!(p = rpc_call_header(task))) { printk(KERN_INFO "RPC: call_header failed, exit EIO\n"); rpc_exit(task, -EIO); } else @@ -618,11 +618,10 @@ task->tk_action = call_reconnect; break; } - /* - * Sleep and dream of an open connection - */ - task->tk_timeout = 5 * HZ; - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + if (RPCXPRT_SUPERCONGESTED(clnt->cl_xprt)) { + task->tk_action = call_ping; + break; + } case -ENOMEM: case -EAGAIN: task->tk_action = call_transmit; @@ -646,6 +645,7 @@ { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; + int major = 0; if (req) { struct rpc_timeout *to = &req->rq_timeout; @@ -666,17 +666,7 @@ rpc_exit(task, -EIO); return; } - if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) { - task->tk_flags |= RPC_CALL_MAJORSEEN; - if (req) - printk(KERN_NOTICE "%s: server %s not responding, still trying\n", - clnt->cl_protname, clnt->cl_server); -#ifdef RPC_DEBUG - else - printk(KERN_NOTICE "%s: task %d can't get a request slot\n", - clnt->cl_protname, task->tk_pid); -#endif - } + major = 1; if (clnt->cl_autobind) clnt->cl_port = 0; @@ -689,6 +679,8 @@ } else if (!xprt_connected(clnt->cl_xprt)) { task->tk_action = call_reconnect; clnt->cl_stats->rpcretrans++; + } else if (major && RPCXPRT_SUPERCONGESTED(clnt->cl_xprt)) { + task->tk_action = call_ping; } else { task->tk_action = call_transmit; clnt->cl_stats->rpcretrans++; @@ -710,12 +702,6 @@ dprintk("RPC: %4d call_decode (status %d)\n", task->tk_pid, task->tk_status); - if (clnt->cl_chatty && (task->tk_flags & RPC_CALL_MAJORSEEN)) { - printk(KERN_NOTICE "%s: server %s OK\n", - clnt->cl_protname, clnt->cl_server); - task->tk_flags &= ~RPC_CALL_MAJORSEEN; - } - if (task->tk_status < 12) { if (!clnt->cl_softrtry) { task->tk_action = call_transmit; @@ -729,7 +715,7 @@ } /* Verify the RPC header */ - if (!(p = call_verify(task))) + if (!(p = rpc_call_verify(task))) return; /* @@ -788,8 +774,8 @@ /* * Call header serialization */ -static u32 * -call_header(struct rpc_task *task) +u32 * +rpc_call_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_xprt *xprt = clnt->cl_xprt; @@ -809,10 +795,63 @@ } /* + * Ping a non-responding server + */ +static void +call_ping(struct rpc_task *task) +{ + task->tk_action = call_pingresult; + rpc_ping(task); +} + +/* + * Interpret the result from ping + */ +static void +call_pingresult(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = clnt->cl_xprt; + int status = task->tk_status; + + task->tk_status = 0; + if (status >= 0) { + task->tk_action = call_transmit; + return; + } + + switch(status) { + case -ECONNREFUSED: + case -ENOTCONN: + if (clnt->cl_autobind || !clnt->cl_port) { + clnt->cl_port = 0; + task->tk_action = call_bind; + break; + } + if (xprt->stream) { + task->tk_action = call_reconnect; + break; + } + case -ENOMEM: + case -ENOBUFS: + rpc_delay(task, HZ >> 4); + case -ETIMEDOUT: + task->tk_action = call_ping; + break; + default: + if (clnt->cl_chatty) + printk("%s: RPC call returned error %d\n", + clnt->cl_protname, -status); + rpc_exit(task,status); + return; + } +} + +/* * Reply header verification */ -static u32 * -call_verify(struct rpc_task *task) +u32 * +rpc_call_verify(struct rpc_task *task) { u32 *p = task->tk_rqstp->rq_rvec[0].iov_base, n; diff -u --recursive --new-file linux-2.4.10/net/sunrpc/ping.c linux-2.4.10-tune/net/sunrpc/ping.c --- linux-2.4.10/net/sunrpc/ping.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.10-tune/net/sunrpc/ping.c Mon Sep 24 00:38:01 2001 @@ -0,0 +1,218 @@ +/* + * linux/net/sunrpc/ping.c + * + * Ping routing. + * + * Copyright (C) 2000, Trond Myklebust + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define RPC_SLACK_SPACE 512 /* total overkill */ +#define RPC_PING_DELAY (15*HZ) + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_XPRT +#endif + +static void ping_call_reserve(struct rpc_task *); +static void ping_call_allocate(struct rpc_task *); +static void ping_call_encode(struct rpc_task *); +static void ping_call_transmit(struct rpc_task *); +static void ping_call_receive(struct rpc_task *); +static void ping_call_exit(struct rpc_task *); + + +static void +ping_call_reserve(struct rpc_task *task) +{ + dprintk("RPC: %4d, ping_call_reserve\n", task->tk_pid); + task->tk_status = 0; + task->tk_action = ping_call_allocate; + task->tk_timeout = task->tk_client->cl_timeout.to_resrvval; + xprt_ping_reserve(task); +} + +static void +ping_call_allocate(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + struct rpc_rqst *req = task->tk_rqstp; + unsigned int bufsiz; + + dprintk("RPC: %4d, ping_call_allocate (status %d)\n", + task->tk_pid, task->tk_status); + + task->tk_action = ping_call_exit; + if (task->tk_status < 0) + return; + + bufsiz = rpcproc_bufsiz(clnt, task->tk_msg.rpc_proc) + RPC_SLACK_SPACE; + if (!(task->tk_buffer = rpc_malloc(task, bufsiz << 1))) { + task->tk_status = -ENOMEM; + return; + } + req->rq_svec[0].iov_base = (void *)task->tk_buffer; + req->rq_svec[0].iov_len = bufsiz; + req->rq_slen = 0; + req->rq_snr = 1; + req->rq_rvec[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); + req->rq_rvec[0].iov_len = bufsiz; + req->rq_rlen = bufsiz; + req->rq_rnr = 1; + task->tk_action = ping_call_encode; +} + +static void +ping_call_encode(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + u32 *p; + + dprintk("RPC: %4d, ping_call_encode (status %d)\n", + task->tk_pid, task->tk_status); + + if (task->tk_status < 0) { + task->tk_action = ping_call_exit; + return; + } + p = rpc_call_header(task); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + task->tk_action = ping_call_transmit; +} + +static void +ping_call_transmit(struct rpc_task *task) +{ + dprintk("RPC: %4d, ping_call_transmit\n", task->tk_pid); + task->tk_action = ping_call_receive; + xprt_transmit(task); +} + +static void +ping_call_receive(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_timeout *to = &req->rq_timeout; + u32 *p; + + dprintk("RPC: %4d, ping_call_receive (status %d)\n", + task->tk_pid, task->tk_status); + + if (task->tk_status >= 0) + p = rpc_call_verify(task); + + task->tk_action = ping_call_exit; + + if (task->tk_status >= 0 || task->tk_status == -EACCES) { + task->tk_status = 0; + if (xprt_norespond(xprt)) { + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s OK\n", + clnt->cl_protname, clnt->cl_server); + xprt_clear_norespond(xprt); + } + return; + } + + switch (task->tk_status) { + case -ENOTCONN: + break; + case -ENOMEM: + case -EAGAIN: + case -ECONNREFUSED: + case -ETIMEDOUT: + if (!xprt_adjust_timeout(to)) { + task->tk_status = 0; + task->tk_action = ping_call_transmit; + break; + } + default: + if (clnt->cl_softrtry) { + task->tk_status = -EIO; + break; + } + if (clnt->cl_chatty) { + if (!xprt_test_and_set_norespond(xprt)) { + printk(KERN_NOTICE + "%s: server %s is not responding\n", + clnt->cl_protname, clnt->cl_server); + } else { + printk(KERN_NOTICE + "%s: server %s still not responding\n", + clnt->cl_protname, clnt->cl_server); + } + } + rpc_delay(task, RPC_PING_DELAY); + } +} + +static void +ping_call_exit(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + + dprintk("RPC: %4d, ping_call_exit (status %d)\n", + task->tk_pid, task->tk_status); + + task->tk_action = NULL; + xprt_ping_release(task); + + /* Sigh. rpc_delay() clears task->tk_status */ + if (task->tk_status == 0 && xprt_norespond(xprt)) + task->tk_status = -ETIMEDOUT; + + xprt_clear_pinging(xprt); + rpc_wake_up_status(&xprt->pingwait, task->tk_status); +} + +void +rpc_ping(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_task *child; + struct rpc_message msg = {0, NULL, NULL, NULL}; + + dprintk("RPC: %4d, rpc_ping\n", task->tk_pid); + + again: + if (xprt_test_and_set_pinging(xprt)) { + rpc_sleep_on(&xprt->pingwait, task, NULL, 0); + if (!xprt_pinging(xprt)) { + rpc_wake_up_task(task); + goto again; + } + dprintk("RPC: %4d, rpc_ping, waiting on completion\n", + task->tk_pid); + return; + } + + child = rpc_new_child(clnt, task); + if (!child) { + dprintk("RPC: %4d, rpc_ping, failed to create child process\n", + task->tk_pid); + xprt_clear_pinging(xprt); + rpc_wake_up_status(&xprt->pingwait, -ENOMEM); + task->tk_status = -ENOMEM; + return; + } + rpc_call_setup(child, &msg, 0); + child->tk_action = ping_call_reserve; + + dprintk("RPC: %4d, rpc_ping, running child process %4d\n", + task->tk_pid, child->tk_pid); + rpc_run_child(task, child, NULL); +} diff -u --recursive --new-file linux-2.4.10/net/sunrpc/sched.c linux-2.4.10-tune/net/sunrpc/sched.c --- linux-2.4.10/net/sunrpc/sched.c Mon Sep 24 00:33:35 2001 +++ linux-2.4.10-tune/net/sunrpc/sched.c Mon Sep 24 00:37:40 2001 @@ -30,7 +30,7 @@ /* * We give RPC the same get_free_pages priority as NFS */ -#define GFP_RPC GFP_NFS +#define GFP_RPC GFP_NOFS static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); @@ -744,7 +744,7 @@ * for readahead): * * sync user requests: GFP_KERNEL - * async requests: GFP_RPC (== GFP_NFS) + * async requests: GFP_RPC (== GFP_NOFS) * swap requests: GFP_ATOMIC (or new GFP_SWAPPER) */ void * @@ -772,8 +772,8 @@ } if (flags & RPC_TASK_ASYNC) return NULL; - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ>>4); + current->policy |= SCHED_YIELD; + schedule(); } while (!signalled()); return NULL; @@ -1068,8 +1068,6 @@ strcpy(current->comm, "rpciod"); - current->flags |= PF_MEMALLOC; - dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid); while (rpciod_users) { if (signalled()) { @@ -1116,8 +1114,8 @@ __rpc_schedule(); if (all_tasks) { dprintk("rpciod_killall: waiting for tasks to exit\n"); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); + current->policy |= SCHED_YIELD; + schedule(); } } @@ -1187,8 +1185,8 @@ * wait briefly before checking the process id. */ current->sigpending = 0; - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); + current->policy |= SCHED_YIELD; + schedule(); /* * Display a message if we're going to wait longer. */ diff -u --recursive --new-file linux-2.4.10/net/sunrpc/xprt.c linux-2.4.10-tune/net/sunrpc/xprt.c --- linux-2.4.10/net/sunrpc/xprt.c Mon Sep 24 00:33:35 2001 +++ linux-2.4.10-tune/net/sunrpc/xprt.c Mon Sep 24 00:38:01 2001 @@ -85,7 +85,7 @@ */ static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static void do_xprt_transmit(struct rpc_task *); -static void xprt_reserve_status(struct rpc_task *task); +static void xprt_alloc_slot(struct rpc_xprt *, struct rpc_task *); static void xprt_disconnect(struct rpc_xprt *); static void xprt_reconn_status(struct rpc_task *task); static struct socket *xprt_create_socket(int, struct rpc_timeout *); @@ -1247,15 +1247,8 @@ rpc_sleep_on(&xprt->sending, task, NULL, NULL); } spin_unlock_bh(&xprt->sock_lock); - return; case -EAGAIN: - /* Keep holding the socket if it is blocked */ - rpc_delay(task, HZ>>4); return; - case -ECONNREFUSED: - case -ENOTCONN: - if (!xprt->stream) - return; default: if (xprt->stream) xprt_disconnect(xprt); @@ -1306,9 +1299,11 @@ dprintk("RPC: %4d xprt_reserve cong = %ld cwnd = %ld\n", task->tk_pid, xprt->cong, xprt->cwnd); spin_lock_bh(&xprt->xprt_lock); - xprt_reserve_status(task); + if (!RPCXPRT_CONGESTED(xprt)) + xprt_alloc_slot(xprt, task); if (task->tk_rqstp) { task->tk_timeout = 0; + xprt->cong += RPC_CWNDSCALE; } else if (!task->tk_timeout) { task->tk_status = -ENOBUFS; } else { @@ -1323,35 +1318,48 @@ } /* - * Reservation callback + * Reserve a ping RPC call slot. */ -static void -xprt_reserve_status(struct rpc_task *task) +int +xprt_ping_reserve(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - struct rpc_rqst *req; - if (xprt->shutdown) { - task->tk_status = -EIO; - } else if (task->tk_status < 0) { - /* NOP */ - } else if (task->tk_rqstp) { - /* We've already been given a request slot: NOP */ - } else { - if (RPCXPRT_CONGESTED(xprt) || !(req = xprt->free)) - goto out_nofree; - /* OK: There's room for us. Grab a free slot and bump - * congestion value */ - xprt->free = req->rq_next; - req->rq_next = NULL; - xprt->cong += RPC_CWNDSCALE; - task->tk_rqstp = req; - xprt_request_init(task, xprt); + /* We already have an initialized request. */ + if (task->tk_rqstp) + return 0; - if (xprt->free) - xprt_clear_backlog(xprt); - } + dprintk("RPC: %4d xprt_ping_reserve cong = %ld cwnd = %ld\n", + task->tk_pid, xprt->cong, xprt->cwnd); + spin_lock_bh(&xprt->xprt_lock); + xprt_alloc_slot(xprt, task); + if (!task->tk_rqstp) + task->tk_status = -ENOBUFS; + spin_unlock_bh(&xprt->xprt_lock); + dprintk("RPC: %4d xprt_ping_reserve returns %d\n", + task->tk_pid, task->tk_status); + return task->tk_status; +} +/* + * Reserve a slot + */ +static void +xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) +{ + struct rpc_rqst *req; + + if (!(req = xprt->free)) + goto out_nofree; + /* OK: There's room for us. Grab a free slot and bump + * congestion value */ + xprt->free = req->rq_next; + req->rq_next = NULL; + task->tk_rqstp = req; + xprt_request_init(task, xprt); + + if (xprt->free) + xprt_clear_backlog(xprt); return; out_nofree: @@ -1383,8 +1391,8 @@ /* * Release an RPC call slot */ -void -xprt_release(struct rpc_task *task) +static void +__xprt_release(struct rpc_task *task, int congvalue) { struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req; @@ -1411,13 +1419,26 @@ req->rq_next = xprt->free; xprt->free = req; - /* Decrease congestion value. */ - xprt->cong -= RPC_CWNDSCALE; - - xprt_clear_backlog(xprt); + if (congvalue) { + /* Decrease congestion value. */ + xprt->cong -= congvalue; + xprt_clear_backlog(xprt); + } spin_unlock_bh(&xprt->xprt_lock); } +void +xprt_release(struct rpc_task *task) +{ + __xprt_release(task, RPC_CWNDSCALE); +} + +void +xprt_ping_release(struct rpc_task *task) +{ + __xprt_release(task, 0); +} + /* * Set default timeout parameters */ @@ -1487,6 +1508,7 @@ xprt->pending = RPC_INIT_WAITQ("xprt_pending"); xprt->sending = RPC_INIT_WAITQ("xprt_sending"); xprt->backlog = RPC_INIT_WAITQ("xprt_backlog"); + xprt->pingwait= RPC_INIT_WAITQ("xprt_pingwait"); /* initialize free list */ for (i = 0, req = xprt->slot; i < RPC_MAXREQS-1; i++, req++) @@ -1622,6 +1644,7 @@ rpc_wake_up(&xprt->sending); rpc_wake_up(&xprt->pending); rpc_wake_up(&xprt->backlog); + rpc_wake_up(&xprt->pingwait); if (waitqueue_active(&xprt->cong_wait)) wake_up(&xprt->cong_wait); }