diff -u --recursive --new-file linux-2.4.19/arch/i386/kernel/i386_ksyms.c linux-2.4.19-17-rpcbuf/arch/i386/kernel/i386_ksyms.c --- linux-2.4.19/arch/i386/kernel/i386_ksyms.c Sat Aug 3 02:39:42 2002 +++ linux-2.4.19-17-rpcbuf/arch/i386/kernel/i386_ksyms.c Sat Oct 5 03:50:54 2002 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -173,6 +174,11 @@ EXPORT_SYMBOL(atomic_dec_and_lock); #endif +#ifdef CONFIG_HIGHMEM +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); +#endif + extern int is_sony_vaio_laptop; EXPORT_SYMBOL(is_sony_vaio_laptop); diff -u --recursive --new-file linux-2.4.19/arch/mips/kernel/mips_ksyms.c linux-2.4.19-17-rpcbuf/arch/mips/kernel/mips_ksyms.c --- linux-2.4.19/arch/mips/kernel/mips_ksyms.c Sat Aug 3 02:39:43 2002 +++ linux-2.4.19-17-rpcbuf/arch/mips/kernel/mips_ksyms.c Sat Oct 5 03:50:54 2002 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -127,3 +128,8 @@ #endif EXPORT_SYMBOL(get_wchan); + +#ifdef CONFIG_HIGHMEM +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); +#endif diff -u --recursive --new-file linux-2.4.19/arch/ppc/kernel/ppc_ksyms.c linux-2.4.19-17-rpcbuf/arch/ppc/kernel/ppc_ksyms.c --- linux-2.4.19/arch/ppc/kernel/ppc_ksyms.c Sat Aug 3 02:39:43 2002 +++ linux-2.4.19-17-rpcbuf/arch/ppc/kernel/ppc_ksyms.c Sat Oct 5 03:50:55 2002 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -364,3 +365,8 @@ EXPORT_SYMBOL_NOVERS(agp_special_page); #endif /* defined(CONFIG_ALL_PPC) */ +#ifdef CONFIG_HIGHMEM +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); +#endif + diff -u --recursive --new-file linux-2.4.19/arch/sparc/kernel/sparc_ksyms.c linux-2.4.19-17-rpcbuf/arch/sparc/kernel/sparc_ksyms.c --- linux-2.4.19/arch/sparc/kernel/sparc_ksyms.c Sat Aug 3 02:39:43 2002 +++ linux-2.4.19-17-rpcbuf/arch/sparc/kernel/sparc_ksyms.c Sat Oct 5 03:50:55 2002 @@ -23,6 +23,7 @@ #include #endif #include +#include #include #include @@ -300,3 +301,8 @@ /* Sun Power Management Idle Handler */ EXPORT_SYMBOL(pm_idle); + +#ifdef CONFIG_HIGHMEM +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); +#endif diff -u --recursive --new-file linux-2.4.19/fs/lockd/xdr.c linux-2.4.19-17-rpcbuf/fs/lockd/xdr.c --- linux-2.4.19/fs/lockd/xdr.c Mon Oct 1 22:45:47 2001 +++ linux-2.4.19-17-rpcbuf/fs/lockd/xdr.c Sat Oct 5 03:51:45 2002 @@ -561,11 +561,10 @@ #define nlmclt_decode_norep NULL #define PROC(proc, argtype, restype) \ - { "nlm_" #proc, \ - (kxdrproc_t) nlmclt_encode_##argtype, \ - (kxdrproc_t) nlmclt_decode_##restype, \ - MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2, \ - 0 \ + { .p_procname = "nlm_" #proc, \ + .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \ + .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \ + .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2 \ } static struct rpc_procinfo nlm_procedures[] = { diff -u --recursive --new-file linux-2.4.19/fs/lockd/xdr4.c linux-2.4.19-17-rpcbuf/fs/lockd/xdr4.c --- linux-2.4.19/fs/lockd/xdr4.c Mon Oct 1 22:45:47 2001 +++ linux-2.4.19-17-rpcbuf/fs/lockd/xdr4.c Sat Oct 5 03:51:45 2002 @@ -566,12 +566,11 @@ */ #define nlm4clt_decode_norep NULL -#define PROC(proc, argtype, restype) \ - { "nlm4_" #proc, \ - (kxdrproc_t) nlm4clt_encode_##argtype, \ - (kxdrproc_t) nlm4clt_decode_##restype, \ - MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2, \ - 0 \ +#define PROC(proc, argtype, restype) \ + { .p_procname = "nlm4_" #proc, \ + .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \ + .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \ + .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2 \ } static struct rpc_procinfo nlm4_procedures[] = { diff -u --recursive --new-file linux-2.4.19/fs/nfs/dir.c linux-2.4.19-17-rpcbuf/fs/nfs/dir.c --- linux-2.4.19/fs/nfs/dir.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-17-rpcbuf/fs/nfs/dir.c Sat Oct 5 03:50:55 2002 @@ -99,13 +99,12 @@ struct file *file = desc->file; struct inode *inode = file->f_dentry->d_inode; struct rpc_cred *cred = nfs_file_cred(file); - void *buffer = kmap(page); int error; dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index); again: - error = NFS_PROTO(inode)->readdir(inode, cred, desc->entry->cookie, buffer, + error = NFS_PROTO(inode)->readdir(inode, cred, desc->entry->cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); /* We requested READDIRPLUS, but the server doesn't grok it */ if (desc->plus && error == -ENOTSUPP) { @@ -116,7 +115,6 @@ if (error < 0) goto error; SetPageUptodate(page); - kunmap(page); /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * throught inode->i_sem or some other mechanism. @@ -315,12 +313,12 @@ status = -ENOMEM; goto out; } - desc->page = page; - desc->ptr = kmap(page); desc->error = NFS_PROTO(inode)->readdir(inode, cred, desc->target, - desc->ptr, + page, NFS_SERVER(inode)->dtsize, desc->plus); + desc->page = page; + desc->ptr = kmap(page); if (desc->error >= 0) { if ((status = dir_decode(desc)) == 0) desc->entry->prev_cookie = desc->target; diff -u --recursive --new-file linux-2.4.19/fs/nfs/inode.c linux-2.4.19-17-rpcbuf/fs/nfs/inode.c --- linux-2.4.19/fs/nfs/inode.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-17-rpcbuf/fs/nfs/inode.c Sat Oct 5 03:53:49 2002 @@ -472,7 +472,8 @@ goto failure_kill_reqlist; } - /* We're airborne */ + /* We're airborne Set socket buffersize */ + rpc_setbufsize(clnt, server->wsize + 100, server->rsize + 100); /* Check whether to start the lockd process */ if (!(server->flags & NFS_MOUNT_NONLM)) diff -u --recursive --new-file linux-2.4.19/fs/nfs/nfs2xdr.c linux-2.4.19-17-rpcbuf/fs/nfs/nfs2xdr.c --- linux-2.4.19/fs/nfs/nfs2xdr.c Mon Feb 25 20:38:09 2002 +++ linux-2.4.19-17-rpcbuf/fs/nfs/nfs2xdr.c Sat Oct 5 03:51:46 2002 @@ -24,9 +24,6 @@ #include #include -/* Uncomment this to support servers requiring longword lengths */ -#define NFS_PAD_WRITES 1 - #define NFSDBG_FACILITY NFSDBG_XDR /* #define NFS_PARANOIA 1 */ @@ -90,17 +87,6 @@ return p + XDR_QUADLEN(NFS2_FHSIZE); } -static inline u32 * -xdr_decode_string2(u32 *p, char **string, unsigned int *len, - unsigned int maxlen) -{ - *len = ntohl(*p++); - if (*len > maxlen) - return NULL; - *string = (char *) p; - return p + XDR_QUADLEN(*len); -} - static inline u32* xdr_decode_time(u32 *p, u64 *timep) { @@ -109,7 +95,7 @@ return p; } -static inline u32 * +static u32 * xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) { fattr->type = (enum nfs_ftype) ntohl(*p++); @@ -223,35 +209,20 @@ nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args) { struct rpc_auth *auth = req->rq_task->tk_auth; - int buflen, replen; - unsigned int nr; + unsigned int replen; + u32 offset = (u32)args->offset; + u32 count = args->count; p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->offset); - *p++ = htonl(args->count); - *p++ = htonl(args->count); + *p++ = htonl(offset); + *p++ = htonl(count); + *p++ = htonl(count); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - /* Get the number of buffers in the receive iovec */ - nr = args->nriov; - - if (nr+2 > MAX_IOVEC) { - printk(KERN_ERR "NFS: Bad number of iov's in xdr_readargs\n"); - return -EINVAL; - } - - /* set up reply iovec */ + /* Inline the page array */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; - buflen = req->rq_rvec[0].iov_len; - req->rq_rvec[0].iov_len = replen; - /* Copy the iovec */ - memcpy(req->rq_rvec + 1, args->iov, nr * sizeof(struct iovec)); - - req->rq_rvec[nr+1].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; - req->rq_rvec[nr+1].iov_len = buflen - replen; - req->rq_rlen = args->count + buflen; - req->rq_rnr += nr+1; - + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->pages, args->pgbase, count); return 0; } @@ -272,10 +243,10 @@ hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len > hdrlen) { dprintk("NFS: READ header is short. iovec will be shifted.\n"); - xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); + xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen); } - recvd = req->rq_rlen - hdrlen; + recvd = req->rq_received - hdrlen; if (count > recvd) { printk(KERN_WARNING "NFS: server cheating in read reply: " "count %d > recvd %d\n", count, recvd); @@ -284,7 +255,6 @@ dprintk("RPC: readres OK count %d\n", count); if (count < res->count) { - xdr_zero_iovec(iov+1, req->rq_rnr-2, res->count - count); res->count = count; res->eof = 1; /* Silly NFSv3ism which can't be helped */ } else @@ -300,46 +270,19 @@ static int nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) { - unsigned int nr; + struct xdr_buf *sndbuf = &req->rq_snd_buf; + u32 offset = (u32)args->offset; u32 count = args->count; p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->offset); - *p++ = htonl(args->offset); + *p++ = htonl(offset); + *p++ = htonl(offset); *p++ = htonl(count); *p++ = htonl(count); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - - /* Get the number of buffers in the send iovec */ - nr = args->nriov; - - if (nr+2 > MAX_IOVEC) { - printk(KERN_ERR "NFS: Bad number of iov's in xdr_writeargs " - "(nr %d max %d)\n", nr, MAX_IOVEC); - return -EINVAL; - } - - /* Copy the iovec */ - memcpy(req->rq_svec + 1, args->iov, nr * sizeof(struct iovec)); - -#ifdef NFS_PAD_WRITES - /* - * Some old servers require that the message length - * be a multiple of 4, so we pad it here if needed. - */ - if (count & 3) { - struct iovec *iov = req->rq_svec + nr + 1; - int pad = 4 - (count & 3); - - iov->iov_base = (void *) "\0\0\0"; - iov->iov_len = pad; - count += pad; - nr++; - } -#endif - req->rq_slen += count; - req->rq_snr += nr; + sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); + /* Copy the page array */ + xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); return 0; } @@ -406,32 +349,24 @@ { struct rpc_task *task = req->rq_task; struct rpc_auth *auth = task->tk_auth; - u32 bufsiz = args->bufsiz; - int buflen, replen; + unsigned int replen; + u32 count = args->count; /* * Some servers (e.g. HP OS 9.5) seem to expect the buffer size * to be in longwords ... check whether to convert the size. */ if (task->tk_client->cl_flags & NFS_CLNTF_BUFSIZE) - bufsiz = bufsiz >> 2; + count = count >> 2; p = xdr_encode_fhandle(p, args->fh); *p++ = htonl(args->cookie); - *p++ = htonl(bufsiz); /* see above */ + *p++ = htonl(count); /* see above */ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - /* set up reply iovec */ + /* Inline the page array */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2; - buflen = req->rq_rvec[0].iov_len; - req->rq_rvec[0].iov_len = replen; - req->rq_rvec[1].iov_base = args->buffer; - req->rq_rvec[1].iov_len = args->bufsiz; - req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; - req->rq_rvec[2].iov_len = buflen - replen; - req->rq_rlen = buflen + args->bufsiz; - req->rq_rnr += 2; - + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); return 0; } @@ -443,12 +378,15 @@ * from nfs_readdir for each entry. */ static int -nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res) +nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, void *dummy) { - struct iovec *iov = req->rq_rvec; - int hdrlen; - int status, nr; - u32 *end, *entry, len; + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct iovec *iov = rcvbuf->head; + struct page **page; + int hdrlen, recvd; + int status, nr; + unsigned int len, pglen; + u32 *end, *entry; if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); @@ -456,15 +394,18 @@ hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len > hdrlen) { dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); - xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); } - - /* Get start and end address of XDR data */ - p = (u32 *) iov[1].iov_base; - end = (u32 *) ((u8 *) p + iov[1].iov_len); + pglen = rcvbuf->page_len; + recvd = req->rq_received - hdrlen; + if (pglen > recvd) + pglen = recvd; + page = rcvbuf->pages; + p = kmap(*page); + end = (u32 *)((char *)p + pglen); + entry = p; for (nr = 0; *p++; nr++) { - entry = p - 1; if (p + 2 > end) goto short_pkt; p++; /* fileid */ @@ -473,16 +414,28 @@ if (len > NFS2_MAXNAMLEN) { printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n", len); - return -errno_NFSERR_IO; + goto err_unmap; } if (p + 2 > end) goto short_pkt; + entry = p; } + if (!nr && (entry[0] != 0 || entry[1] == 0)) + goto short_pkt; + out: + kunmap(*page); return nr; short_pkt: - printk(KERN_NOTICE "NFS: short packet in readdir reply!\n"); entry[0] = entry[1] = 0; - return nr; + /* truncate listing? */ + if (!nr) { + printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); + entry[1] = 1; + } + goto out; +err_unmap: + kunmap(*page); + return -errno_NFSERR_IO; } u32 * @@ -568,21 +521,16 @@ static int nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args) { - struct rpc_task *task = req->rq_task; - struct rpc_auth *auth = task->tk_auth; - int buflen, replen; + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + u32 count = args->count - 4; p = xdr_encode_fhandle(p, args->fh); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2; - buflen = req->rq_rvec[0].iov_len; - req->rq_rvec[0].iov_len = replen; - req->rq_rvec[1].iov_base = args->buffer; - req->rq_rvec[1].iov_len = args->bufsiz; - req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; - req->rq_rvec[2].iov_len = buflen - replen; - req->rq_rlen = buflen + args->bufsiz; - req->rq_rnr += 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); return 0; } @@ -590,32 +538,33 @@ * Decode READLINK reply */ static int -nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_readlinkres *res) +nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, void *dummy) { - struct iovec *iov = req->rq_rvec; - u32 *strlen; + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct iovec *iov = rcvbuf->head; + unsigned int hdrlen; + u32 *strlen, len; char *string; - int hdrlen; int status; - unsigned int len; if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len > hdrlen) { dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); - xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); } - strlen = (u32*)res->buffer; + strlen = (u32*)kmap(rcvbuf->pages[0]); /* Convert length of symlink */ len = ntohl(*strlen); - if (len > res->bufsiz - 5) - len = res->bufsiz - 5; + if (len > rcvbuf->page_len) + len = rcvbuf->page_len; *strlen = len; /* NULL terminate the string we got */ string = (char *)(strlen + 1); string[len] = 0; + kunmap(rcvbuf->pages[0]); return 0; } @@ -732,33 +681,32 @@ # define MAX(a, b) (((a) > (b))? (a) : (b)) #endif -#define PROC(proc, argtype, restype) \ - { "nfs_" #proc, \ - (kxdrproc_t) nfs_xdr_##argtype, \ - (kxdrproc_t) nfs_xdr_##restype, \ - MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \ - 0 \ +#define PROC(proc, argtype, restype, timer) \ + { .p_procname = "nfs_" #proc, \ + .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ + .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ + .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \ + .p_timer = timer \ } - static struct rpc_procinfo nfs_procedures[18] = { - PROC(null, enc_void, dec_void), - PROC(getattr, fhandle, attrstat), - PROC(setattr, sattrargs, attrstat), - PROC(root, enc_void, dec_void), - PROC(lookup, diropargs, diropres), - PROC(readlink, readlinkargs, readlinkres), - PROC(read, readargs, readres), - PROC(writecache, enc_void, dec_void), - PROC(write, writeargs, writeres), - PROC(create, createargs, diropres), - PROC(remove, diropargs, stat), - PROC(rename, renameargs, stat), - PROC(link, linkargs, stat), - PROC(symlink, symlinkargs, stat), - PROC(mkdir, createargs, diropres), - PROC(rmdir, diropargs, stat), - PROC(readdir, readdirargs, readdirres), - PROC(statfs, fhandle, statfsres), + PROC(null, enc_void, dec_void, 0), + PROC(getattr, fhandle, attrstat, 1), + PROC(setattr, sattrargs, attrstat, 0), + PROC(root, enc_void, dec_void, 0), + PROC(lookup, diropargs, diropres, 2), + PROC(readlink, readlinkargs, readlinkres, 3), + PROC(read, readargs, readres, 3), + PROC(writecache, enc_void, dec_void, 0), + PROC(write, writeargs, writeres, 4), + PROC(create, createargs, diropres, 0), + PROC(remove, diropargs, stat, 0), + PROC(rename, renameargs, stat, 0), + PROC(link, linkargs, stat, 0), + PROC(symlink, symlinkargs, stat, 0), + PROC(mkdir, createargs, diropres, 0), + PROC(rmdir, diropargs, stat, 0), + PROC(readdir, readdirargs, readdirres, 3), + PROC(statfs, fhandle, statfsres, 0), }; struct rpc_version nfs_version2 = { diff -u --recursive --new-file linux-2.4.19/fs/nfs/nfs3proc.c linux-2.4.19-17-rpcbuf/fs/nfs/nfs3proc.c --- linux-2.4.19/fs/nfs/nfs3proc.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-17-rpcbuf/fs/nfs/nfs3proc.c Sat Oct 5 03:50:55 2002 @@ -151,17 +151,16 @@ } static int -nfs3_proc_readlink(struct inode *inode, void *buffer, unsigned int buflen) +nfs3_proc_readlink(struct inode *inode, struct page *page) { struct nfs_fattr fattr; - struct nfs3_readlinkargs args = { NFS_FH(inode), buffer, buflen }; - struct nfs3_readlinkres res = { &fattr, buffer, buflen }; + struct nfs3_readlinkargs args = { NFS_FH(inode), PAGE_CACHE_SIZE, &page }; int status; dprintk("NFS call readlink\n"); fattr.valid = 0; status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK, - &args, &res, 0); + &args, &fattr, 0); nfs_refresh_inode(inode, &fattr); dprintk("NFS reply readlink: %d\n", status); return status; @@ -170,11 +169,12 @@ static int nfs3_proc_read(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, int flags, - loff_t offset, unsigned int count, void *buffer, int *eofp) + unsigned int base, unsigned int count, struct page *page, + int *eofp) { - struct nfs_readargs arg = { NFS_FH(inode), offset, count, 1, - {{buffer, count}, {0,0}, {0,0}, {0,0}, - {0,0}, {0,0}, {0,0}, {0,0}} }; + u64 offset = page_offset(page) + base; + struct nfs_readargs arg = { NFS_FH(inode), offset, count, + base, &page }; struct nfs_readres res = { fattr, count, 0 }; struct rpc_message msg = { NFS3PROC_READ, &arg, &res, cred }; int status; @@ -190,13 +190,12 @@ static int nfs3_proc_write(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, int flags, - loff_t offset, unsigned int count, - void *buffer, struct nfs_writeverf *verf) + unsigned int base, unsigned int count, + struct page *page, struct nfs_writeverf *verf) { + u64 offset = page_offset(page) + base; struct nfs_writeargs arg = { NFS_FH(inode), offset, count, - NFS_FILE_SYNC, 1, - {{buffer, count}, {0,0}, {0,0}, {0,0}, - {0,0}, {0,0}, {0,0}, {0,0}} }; + NFS_FILE_SYNC, base, &page }; struct nfs_writeres res = { fattr, verf, 0 }; struct rpc_message msg = { NFS3PROC_WRITE, &arg, &res, cred }; int status, rpcflags = 0; @@ -434,26 +433,16 @@ */ static int nfs3_proc_readdir(struct inode *dir, struct rpc_cred *cred, - u64 cookie, void *entry, - unsigned int size, int plus) + u64 cookie, struct page *page, unsigned int count, int plus) { struct nfs_fattr dir_attr; - struct nfs3_readdirargs arg = { NFS_FH(dir), cookie, {0, 0}, 0, 0, 0 }; - struct nfs3_readdirres res = { &dir_attr, 0, 0, 0, 0 }; - struct rpc_message msg = { NFS3PROC_READDIR, &arg, &res, cred }; u32 *verf = NFS_COOKIEVERF(dir); + struct nfs3_readdirargs arg = { NFS_FH(dir), cookie, {verf[0], verf[1]}, + plus, count, &page }; + struct nfs3_readdirres res = { &dir_attr, verf, plus }; + struct rpc_message msg = { NFS3PROC_READDIR, &arg, &res, cred }; int status; - arg.buffer = entry; - arg.bufsiz = size; - arg.verf[0] = verf[0]; - arg.verf[1] = verf[1]; - arg.plus = plus; - res.buffer = entry; - res.bufsiz = size; - res.verf = verf; - res.plus = plus; - if (plus) msg.rpc_proc = NFS3PROC_READDIRPLUS; diff -u --recursive --new-file linux-2.4.19/fs/nfs/nfs3xdr.c linux-2.4.19-17-rpcbuf/fs/nfs/nfs3xdr.c --- linux-2.4.19/fs/nfs/nfs3xdr.c Sat Nov 3 02:40:09 2001 +++ linux-2.4.19-17-rpcbuf/fs/nfs/nfs3xdr.c Sat Oct 5 03:51:46 2002 @@ -22,9 +22,6 @@ #include #include -/* Uncomment this to support servers requiring longword lengths */ -#define NFS_PAD_WRITES 1 - #define NFSDBG_FACILITY NFSDBG_XDR /* Mapping from NFS error code to "errno" error code. */ @@ -156,18 +153,7 @@ return p; } -static inline u32 * -xdr_decode_string2(u32 *p, char **string, unsigned int *len, - unsigned int maxlen) -{ - *len = ntohl(*p++); - if (*len > maxlen) - return NULL; - *string = (char *) p; - return p + XDR_QUADLEN(*len); -} - -static inline u32 * +static u32 * xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) { unsigned int type; @@ -350,35 +336,18 @@ nfs3_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args) { struct rpc_auth *auth = req->rq_task->tk_auth; - int buflen, replen; - unsigned int nr; + unsigned int replen; + u32 count = args->count; p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_hyper(p, args->offset); - *p++ = htonl(args->count); + *p++ = htonl(count); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - /* Get the number of buffers in the receive iovec */ - nr = args->nriov; - - if (nr+2 > MAX_IOVEC) { - printk(KERN_ERR "NFS: Bad number of iov's in xdr_readargs\n"); - return -EINVAL; - } - - /* set up reply iovec */ + /* Inline the page array */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2; - buflen = req->rq_rvec[0].iov_len; - req->rq_rvec[0].iov_len = replen; - - /* Copy the iovec */ - memcpy(req->rq_rvec + 1, args->iov, nr * sizeof(struct iovec)); - - req->rq_rvec[nr+1].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; - req->rq_rvec[nr+1].iov_len = buflen - replen; - req->rq_rlen = args->count + buflen; - req->rq_rnr += nr+1; - + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->pages, args->pgbase, count); return 0; } @@ -388,7 +357,7 @@ static int nfs3_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) { - unsigned int nr; + struct xdr_buf *sndbuf = &req->rq_snd_buf; u32 count = args->count; p = xdr_encode_fhandle(p, args->fh); @@ -396,37 +365,10 @@ *p++ = htonl(count); *p++ = htonl(args->stable); *p++ = htonl(count); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - - /* Get the number of buffers in the send iovec */ - nr = args->nriov; - - if (nr+2 > MAX_IOVEC) { - printk(KERN_ERR "NFS: Bad number of iov's in xdr_writeargs\n"); - return -EINVAL; - } - - /* Copy the iovec */ - memcpy(req->rq_svec + 1, args->iov, nr * sizeof(struct iovec)); - -#ifdef NFS_PAD_WRITES - /* - * Some old servers require that the message length - * be a multiple of 4, so we pad it here if needed. - */ - if (count & 3) { - struct iovec *iov = req->rq_svec + nr + 1; - int pad = 4 - (count & 3); - - iov->iov_base = (void *) "\0\0\0"; - iov->iov_len = pad; - count += pad; - nr++; - } -#endif - req->rq_slen += count; - req->rq_snr += nr; + sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); + /* Copy the page array */ + xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); return 0; } @@ -530,7 +472,8 @@ nfs3_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_readdirargs *args) { struct rpc_auth *auth = req->rq_task->tk_auth; - int buflen, replen; + unsigned int replen; + u32 count = args->count; p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_hyper(p, args->cookie); @@ -539,22 +482,14 @@ if (args->plus) { /* readdirplus: need dircount + buffer size. * We just make sure we make dircount big enough */ - *p++ = htonl(args->bufsiz >> 3); + *p++ = htonl(count >> 3); } - *p++ = htonl(args->bufsiz); + *p++ = htonl(count); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - /* set up reply iovec */ + /* Inline the page array */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readdirres_sz) << 2; - buflen = req->rq_rvec[0].iov_len; - req->rq_rvec[0].iov_len = replen; - req->rq_rvec[1].iov_base = args->buffer; - req->rq_rvec[1].iov_len = args->bufsiz; - req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; - req->rq_rvec[2].iov_len = buflen - replen; - req->rq_rlen = buflen + args->bufsiz; - req->rq_rnr += 2; - + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); return 0; } @@ -565,11 +500,13 @@ static int nfs3_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs3_readdirres *res) { - struct iovec *iov = req->rq_rvec; - int hdrlen; - int status, nr; - unsigned int len; - u32 *entry, *end; + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct iovec *iov = rcvbuf->head; + struct page **page; + int hdrlen, recvd; + int status, nr; + unsigned int len, pglen; + u32 *entry, *end; status = ntohl(*p++); /* Decode post_op_attrs */ @@ -587,13 +524,18 @@ hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len > hdrlen) { dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); - xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); } - p = (u32 *) iov[1].iov_base; - end = (u32 *) ((u8 *) p + iov[1].iov_len); + pglen = rcvbuf->page_len; + recvd = req->rq_received - hdrlen; + if (pglen > recvd) + pglen = recvd; + page = rcvbuf->pages; + p = kmap(*page); + entry = p; + end = (u32 *)((char *)p + pglen); for (nr = 0; *p++; nr++) { - entry = p - 1; if (p + 3 > end) goto short_pkt; p += 2; /* inode # */ @@ -602,7 +544,7 @@ if (len > NFS3_MAXNAMLEN) { printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n", len); - return -errno_NFSERR_IO; + goto err_unmap; } if (res->plus) { @@ -622,7 +564,7 @@ if (len > NFS3_FHSIZE) { printk(KERN_WARNING "NFS: giant filehandle in " "readdir (len %x)!\n", len); - return -errno_NFSERR_IO; + goto err_unmap; } p += XDR_QUADLEN(len); } @@ -630,14 +572,24 @@ if (p + 2 > end) goto short_pkt; + entry = p; } - + if (!nr && (entry[0] != 0 || entry[1] == 0)) + goto short_pkt; + out: + kunmap(*page); return nr; short_pkt: - printk(KERN_NOTICE "NFS: short packet in readdir reply!\n"); - /* truncate listing */ entry[0] = entry[1] = 0; - return nr; + /* truncate listing? */ + if (!nr) { + printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); + entry[1] = 1; + } + goto out; +err_unmap: + kunmap(*page); + return -errno_NFSERR_IO; } u32 * @@ -772,21 +724,16 @@ static int nfs3_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_readlinkargs *args) { - struct rpc_task *task = req->rq_task; - struct rpc_auth *auth = task->tk_auth; - int buflen, replen; + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + u32 count = args->count - 4; p = xdr_encode_fhandle(p, args->fh); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readlinkres_sz) << 2; - buflen = req->rq_rvec[0].iov_len; - req->rq_rvec[0].iov_len = replen; - req->rq_rvec[1].iov_base = args->buffer; - req->rq_rvec[1].iov_len = args->bufsiz; - req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; - req->rq_rvec[2].iov_len = buflen - replen; - req->rq_rlen = buflen + args->bufsiz; - req->rq_rnr += 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); return 0; } @@ -794,17 +741,17 @@ * Decode READLINK reply */ static int -nfs3_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs3_readlinkres *res) +nfs3_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) { - struct iovec *iov = req->rq_rvec; - int hdrlen; - u32 *strlen; + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct iovec *iov = rcvbuf->head; + unsigned int hdrlen; + u32 *strlen, len; char *string; int status; - unsigned int len; status = ntohl(*p++); - p = xdr_decode_post_op_attr(p, res->fattr); + p = xdr_decode_post_op_attr(p, fattr); if (status != 0) return -nfs_stat_to_errno(status); @@ -812,18 +759,19 @@ hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len > hdrlen) { dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); - xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); } - strlen = (u32*)res->buffer; + strlen = (u32*)kmap(rcvbuf->pages[0]); /* Convert length of symlink */ len = ntohl(*strlen); - if (len > res->bufsiz - 5) - len = res->bufsiz - 5; + if (len > rcvbuf->page_len) + len = rcvbuf->page_len; *strlen = len; /* NULL terminate the string we got */ string = (char *)(strlen + 1); string[len] = 0; + kunmap(rcvbuf->pages[0]); return 0; } @@ -857,20 +805,18 @@ hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len > hdrlen) { dprintk("NFS: READ header is short. iovec will be shifted.\n"); - xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); + xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen); } - recvd = req->rq_rlen - hdrlen; + recvd = req->rq_received - hdrlen; if (count > recvd) { printk(KERN_WARNING "NFS: server cheating in read reply: " "count %d > recvd %d\n", count, recvd); count = recvd; } - if (count < res->count) { - xdr_zero_iovec(iov+1, req->rq_rnr-2, res->count - count); + if (count < res->count) res->count = count; - } return count; } @@ -1051,37 +997,37 @@ # define MAX(a, b) (((a) > (b))? (a) : (b)) #endif -#define PROC(proc, argtype, restype) \ - { "nfs3_" #proc, \ - (kxdrproc_t) nfs3_xdr_##argtype, \ - (kxdrproc_t) nfs3_xdr_##restype, \ - MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \ - 0 \ +#define PROC(proc, argtype, restype, timer) \ + { .p_procname = "nfs3_" #proc, \ + .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ + .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ + .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \ + .p_timer = timer \ } static struct rpc_procinfo nfs3_procedures[22] = { - PROC(null, enc_void, dec_void), - PROC(getattr, fhandle, attrstat), - PROC(setattr, sattrargs, wccstat), - PROC(lookup, diropargs, lookupres), - PROC(access, accessargs, accessres), - PROC(readlink, readlinkargs, readlinkres), - PROC(read, readargs, readres), - PROC(write, writeargs, writeres), - PROC(create, createargs, createres), - PROC(mkdir, mkdirargs, createres), - PROC(symlink, symlinkargs, createres), - PROC(mknod, mknodargs, createres), - PROC(remove, diropargs, wccstat), - PROC(rmdir, diropargs, wccstat), - PROC(rename, renameargs, renameres), - PROC(link, linkargs, linkres), - PROC(readdir, readdirargs, readdirres), - PROC(readdirplus, readdirargs, readdirres), - PROC(fsstat, fhandle, fsstatres), - PROC(fsinfo, fhandle, fsinfores), - PROC(pathconf, fhandle, pathconfres), - PROC(commit, commitargs, commitres), + PROC(null, enc_void, dec_void, 0), + PROC(getattr, fhandle, attrstat, 1), + PROC(setattr, sattrargs, wccstat, 0), + PROC(lookup, diropargs, lookupres, 2), + PROC(access, accessargs, accessres, 1), + PROC(readlink, readlinkargs, readlinkres, 3), + PROC(read, readargs, readres, 3), + PROC(write, writeargs, writeres, 4), + PROC(create, createargs, createres, 0), + PROC(mkdir, mkdirargs, createres, 0), + PROC(symlink, symlinkargs, createres, 0), + PROC(mknod, mknodargs, createres, 0), + PROC(remove, diropargs, wccstat, 0), + PROC(rmdir, diropargs, wccstat, 0), + PROC(rename, renameargs, renameres, 0), + PROC(link, linkargs, linkres, 0), + PROC(readdir, readdirargs, readdirres, 3), + PROC(readdirplus, readdirargs, readdirres, 3), + PROC(fsstat, fhandle, fsstatres, 0), + PROC(fsinfo, fhandle, fsinfores, 0), + PROC(pathconf, fhandle, pathconfres, 0), + PROC(commit, commitargs, commitres, 5), }; struct rpc_version nfs_version3 = { diff -u --recursive --new-file linux-2.4.19/fs/nfs/proc.c linux-2.4.19-17-rpcbuf/fs/nfs/proc.c --- linux-2.4.19/fs/nfs/proc.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.19-17-rpcbuf/fs/nfs/proc.c Sat Oct 5 03:50:55 2002 @@ -106,15 +106,13 @@ } static int -nfs_proc_readlink(struct inode *inode, void *buffer, unsigned int bufsiz) +nfs_proc_readlink(struct inode *inode, struct page *page) { - struct nfs_readlinkargs args = { NFS_FH(inode), buffer, bufsiz }; - struct nfs_readlinkres res = { buffer, bufsiz }; + struct nfs_readlinkargs args = { NFS_FH(inode), PAGE_CACHE_SIZE, &page }; int status; dprintk("NFS call readlink\n"); - status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, - &args, &res, 0); + status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0); dprintk("NFS reply readlink: %d\n", status); return status; } @@ -122,11 +120,12 @@ static int nfs_proc_read(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, int flags, - loff_t offset, unsigned int count, void *buffer, int *eofp) + unsigned int base, unsigned int count, + struct page *page, int *eofp) { - struct nfs_readargs arg = { NFS_FH(inode), offset, count, 1, - {{ buffer, count }, {0,0}, {0,0}, {0,0}, - {0,0}, {0,0}, {0,0}, {0,0}} }; + u64 offset = page_offset(page) + base; + struct nfs_readargs arg = { NFS_FH(inode), offset, count, + base, &page }; struct nfs_readres res = { fattr, count, 0}; struct rpc_message msg = { NFSPROC_READ, &arg, &res, cred }; int status; @@ -143,13 +142,12 @@ static int nfs_proc_write(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, int how, - loff_t offset, unsigned int count, - void *buffer, struct nfs_writeverf *verf) + unsigned int base, unsigned int count, + struct page *page, struct nfs_writeverf *verf) { - struct nfs_writeargs arg = {NFS_FH(inode), offset, count, - NFS_FILE_SYNC, 1, - {{buffer, count}, {0,0}, {0,0}, {0,0}, - {0,0}, {0,0}, {0,0}, {0,0}}}; + u64 offset = page_offset(page) + base; + struct nfs_writeargs arg = { NFS_FH(inode), offset, count, + NFS_FILE_SYNC, base, &page }; struct nfs_writeres res = {fattr, verf, count}; struct rpc_message msg = { NFSPROC_WRITE, &arg, &res, cred }; int status, flags = 0; @@ -337,21 +335,13 @@ */ static int nfs_proc_readdir(struct inode *dir, struct rpc_cred *cred, - __u64 cookie, void *entry, - unsigned int size, int plus) + __u64 cookie, struct page *page, + unsigned int count, int plus) { - struct nfs_readdirargs arg; - struct nfs_readdirres res; - struct rpc_message msg = { NFSPROC_READDIR, &arg, &res, cred }; + struct nfs_readdirargs arg = { NFS_FH(dir), cookie, count, &page }; + struct rpc_message msg = { NFSPROC_READDIR, &arg, NULL, cred }; int status; - arg.fh = NFS_FH(dir); - arg.cookie = cookie; - arg.buffer = entry; - arg.bufsiz = size; - res.buffer = entry; - res.bufsiz = size; - dprintk("NFS call readdir %d\n", (unsigned int)cookie); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); diff -u --recursive --new-file linux-2.4.19/fs/nfs/read.c linux-2.4.19-17-rpcbuf/fs/nfs/read.c --- linux-2.4.19/fs/nfs/read.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-17-rpcbuf/fs/nfs/read.c Sat Oct 5 03:50:55 2002 @@ -42,6 +42,7 @@ struct nfs_readres res; /* ... and result struct */ struct nfs_fattr fattr; /* fattr storage */ struct list_head pages; /* Coalesced read requests */ + struct page *pagevec[NFS_READ_MAXIOV]; }; /* @@ -63,6 +64,7 @@ if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + p->args.pages = p->pagevec; } return p; } @@ -86,8 +88,7 @@ { struct rpc_cred *cred = NULL; struct nfs_fattr fattr; - loff_t offset = page_offset(page); - char *buffer; + unsigned int offset = 0; int rsize = NFS_SERVER(inode)->rsize; int result; int count = PAGE_CACHE_SIZE; @@ -103,19 +104,18 @@ * This works now because the socket layer never tries to DMA * into this buffer directly. */ - buffer = kmap(page); do { if (count < rsize) rsize = count; - dprintk("NFS: nfs_proc_read(%s, (%x/%Ld), %Ld, %d, %p)\n", + dprintk("NFS: nfs_proc_read(%s, (%x/%Ld), %u, %u, %p)\n", NFS_SERVER(inode)->hostname, inode->i_dev, (long long)NFS_FILEID(inode), - (long long)offset, rsize, buffer); + offset, rsize, page); lock_kernel(); result = NFS_PROTO(inode)->read(inode, cred, &fattr, flags, - offset, rsize, buffer, &eof); + offset, rsize, page, &eof); nfs_refresh_inode(inode, &fattr); unlock_kernel(); @@ -130,12 +130,15 @@ } count -= result; offset += result; - buffer += result; if (result < rsize) /* NFSv2ism */ break; } while (count); - memset(buffer, 0, count); + if (count) { + char *kaddr = kmap(page); + memset(kaddr + offset, 0, count); + kunmap(page); + } flush_dcache_page(page); SetPageUptodate(page); if (PageError(page)) @@ -143,7 +146,6 @@ result = 0; io_error: - kunmap(page); UnlockPage(page); return result; } @@ -186,26 +188,24 @@ nfs_read_rpcsetup(struct list_head *head, struct nfs_read_data *data) { struct nfs_page *req; - struct iovec *iov; + struct page **pages; unsigned int count; - iov = data->args.iov; + pages = data->args.pages; count = 0; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_list_add_request(req, &data->pages); - iov->iov_base = kmap(req->wb_page) + req->wb_offset; - iov->iov_len = req->wb_bytes; + *pages++ = req->wb_page; count += req->wb_bytes; - iov++; - data->args.nriov++; } req = nfs_list_entry(data->pages.next); data->inode = req->wb_inode; data->cred = req->wb_cred; data->args.fh = NFS_FH(req->wb_inode); data->args.offset = page_offset(req->wb_page) + req->wb_offset; + data->args.pgbase = req->wb_offset; data->args.count = count; data->res.fattr = &data->fattr; data->res.count = count; @@ -266,10 +266,10 @@ msg.rpc_cred = data->cred; /* Start the async call */ - dprintk("NFS: %4d initiated read call (req %x/%Ld count %d nriov %d.\n", + dprintk("NFS: %4d initiated read call (req %x/%Ld count %u.\n", task->tk_pid, inode->i_dev, (long long)NFS_FILEID(inode), - data->args.count, data->args.nriov); + data->args.count); rpc_clnt_sigmask(clnt, &oldset); rpc_call_setup(task, &msg, 0); @@ -424,7 +424,6 @@ } else SetPageError(page); flush_dcache_page(page); - kunmap(page); UnlockPage(page); dprintk("NFS: read (%x/%Ld %d@%Ld)\n", diff -u --recursive --new-file linux-2.4.19/fs/nfs/symlink.c linux-2.4.19-17-rpcbuf/fs/nfs/symlink.c --- linux-2.4.19/fs/nfs/symlink.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.19-17-rpcbuf/fs/nfs/symlink.c Sat Oct 5 03:50:55 2002 @@ -29,7 +29,6 @@ */ static int nfs_symlink_filler(struct inode *inode, struct page *page) { - void *buffer = kmap(page); int error; /* We place the length at the beginning of the page, @@ -37,13 +36,11 @@ * XDR response verification will NULL terminate it. */ lock_kernel(); - error = NFS_PROTO(inode)->readlink(inode, buffer, - PAGE_CACHE_SIZE - sizeof(u32)-4); + error = NFS_PROTO(inode)->readlink(inode, page); unlock_kernel(); if (error < 0) goto error; SetPageUptodate(page); - kunmap(page); UnlockPage(page); return 0; diff -u --recursive --new-file linux-2.4.19/fs/nfs/write.c linux-2.4.19-17-rpcbuf/fs/nfs/write.c --- linux-2.4.19/fs/nfs/write.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-17-rpcbuf/fs/nfs/write.c Sat Oct 5 03:50:39 2002 @@ -77,6 +77,7 @@ struct nfs_fattr fattr; struct nfs_writeverf verf; struct list_head pages; /* Coalesced requests we wish to flush */ + struct page *pagevec[NFS_WRITE_MAXIOV]; }; /* @@ -105,6 +106,7 @@ if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + p->args.pages = p->pagevec; } return p; } @@ -163,7 +165,6 @@ inode->i_dev, (long long)NFS_FILEID(inode), count, (long long)(page_offset(page) + offset)); - buffer = kmap(page) + offset; base = page_offset(page) + offset; flags = ((IS_SWAPFILE(inode)) ? NFS_RW_SWAP : 0) | NFS_RW_SYNC; @@ -173,7 +174,7 @@ wsize = count; result = NFS_PROTO(inode)->write(inode, cred, &fattr, flags, - base, wsize, buffer, &verf); + offset, wsize, page, &verf); nfs_write_attributes(inode, &fattr); if (result < 0) { @@ -186,7 +187,8 @@ wsize, result); refresh = 1; buffer += wsize; - base += wsize; + base += wsize; + offset += wsize; written += wsize; count -= wsize; /* @@ -201,7 +203,6 @@ ClearPageError(page); io_error: - kunmap(page); if (cred) put_rpccred(cred); @@ -861,29 +862,27 @@ nfs_write_rpcsetup(struct list_head *head, struct nfs_write_data *data) { struct nfs_page *req; - struct iovec *iov; + struct page **pages; unsigned int count; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ - iov = data->args.iov; + pages = data->args.pages; count = 0; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_list_add_request(req, &data->pages); - iov->iov_base = kmap(req->wb_page) + req->wb_offset; - iov->iov_len = req->wb_bytes; + *pages++ = req->wb_page; count += req->wb_bytes; - iov++; - data->args.nriov++; } req = nfs_list_entry(data->pages.next); data->inode = req->wb_inode; data->cred = req->wb_cred; data->args.fh = NFS_FH(req->wb_inode); data->args.offset = page_offset(req->wb_page) + req->wb_offset; + data->args.pgbase = req->wb_offset; data->args.count = count; data->res.fattr = &data->fattr; data->res.count = count; @@ -948,11 +947,11 @@ msg.rpc_resp = &data->res; msg.rpc_cred = data->cred; - dprintk("NFS: %4d initiated write call (req %x/%Ld count %d nriov %d)\n", + dprintk("NFS: %4d initiated write call (req %x/%Ld count %u)\n", task->tk_pid, inode->i_dev, (long long)NFS_FILEID(inode), - data->args.count, data->args.nriov); + data->args.count); rpc_clnt_sigmask(clnt, &oldset); rpc_call_setup(task, &msg, 0); @@ -1064,8 +1063,6 @@ nfs_list_remove_request(req); page = req->wb_page; - kunmap(page); - dprintk("NFS: write (%x/%Ld %d@%Ld)", req->wb_inode->i_dev, (long long)NFS_FILEID(req->wb_inode), diff -u --recursive --new-file linux-2.4.19/include/asm-i386/kmap_types.h linux-2.4.19-17-rpcbuf/include/asm-i386/kmap_types.h --- linux-2.4.19/include/asm-i386/kmap_types.h Mon Sep 17 22:16:30 2001 +++ linux-2.4.19-17-rpcbuf/include/asm-i386/kmap_types.h Sat Oct 5 03:50:55 2002 @@ -3,7 +3,7 @@ enum km_type { KM_BOUNCE_READ, - KM_SKB_DATA, + KM_SKB_SUNRPC_DATA, KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, diff -u --recursive --new-file linux-2.4.19/include/asm-mips/kmap_types.h linux-2.4.19-17-rpcbuf/include/asm-mips/kmap_types.h --- linux-2.4.19/include/asm-mips/kmap_types.h Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-17-rpcbuf/include/asm-mips/kmap_types.h Sat Oct 5 03:50:55 2002 @@ -3,7 +3,7 @@ enum km_type { KM_BOUNCE_READ, - KM_SKB_DATA, + KM_SKB_SUNRPC_DATA, KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, diff -u --recursive --new-file linux-2.4.19/include/asm-ppc/kmap_types.h linux-2.4.19-17-rpcbuf/include/asm-ppc/kmap_types.h --- linux-2.4.19/include/asm-ppc/kmap_types.h Mon Sep 17 22:16:30 2001 +++ linux-2.4.19-17-rpcbuf/include/asm-ppc/kmap_types.h Sat Oct 5 03:50:55 2002 @@ -7,7 +7,7 @@ enum km_type { KM_BOUNCE_READ, - KM_SKB_DATA, + KM_SKB_SUNRPC_DATA, KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, diff -u --recursive --new-file linux-2.4.19/include/asm-sparc/kmap_types.h linux-2.4.19-17-rpcbuf/include/asm-sparc/kmap_types.h --- linux-2.4.19/include/asm-sparc/kmap_types.h Mon Sep 17 22:16:30 2001 +++ linux-2.4.19-17-rpcbuf/include/asm-sparc/kmap_types.h Sat Oct 5 03:50:55 2002 @@ -3,7 +3,7 @@ enum km_type { KM_BOUNCE_READ, - KM_SKB_DATA, + KM_SKB_SUNRPC_DATA, KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, diff -u --recursive --new-file linux-2.4.19/include/linux/nfs_xdr.h linux-2.4.19-17-rpcbuf/include/linux/nfs_xdr.h --- linux-2.4.19/include/linux/nfs_xdr.h Mon Jan 29 21:07:43 2001 +++ linux-2.4.19-17-rpcbuf/include/linux/nfs_xdr.h Sat Oct 5 03:50:56 2002 @@ -68,8 +68,8 @@ struct nfs_fh * fh; __u64 offset; __u32 count; - unsigned int nriov; - struct iovec iov[NFS_READ_MAXIOV]; + unsigned int pgbase; + struct page ** pages; }; struct nfs_readres { @@ -87,8 +87,8 @@ __u64 offset; __u32 count; enum nfs3_stable_how stable; - unsigned int nriov; - struct iovec iov[NFS_WRITE_MAXIOV]; + unsigned int pgbase; + struct page ** pages; }; struct nfs_writeverf { @@ -165,8 +165,8 @@ struct nfs_readdirargs { struct nfs_fh * fh; __u32 cookie; - void * buffer; - unsigned int bufsiz; + unsigned int count; + struct page ** pages; }; struct nfs_diropok { @@ -176,18 +176,8 @@ struct nfs_readlinkargs { struct nfs_fh * fh; - void * buffer; - unsigned int bufsiz; -}; - -struct nfs_readlinkres { - void * buffer; - unsigned int bufsiz; -}; - -struct nfs_readdirres { - void * buffer; - unsigned int bufsiz; + unsigned int count; + struct page ** pages; }; struct nfs3_sattrargs { @@ -262,9 +252,9 @@ struct nfs_fh * fh; __u64 cookie; __u32 verf[2]; - void * buffer; - unsigned int bufsiz; int plus; + unsigned int count; + struct page ** pages; }; struct nfs3_diropres { @@ -280,14 +270,8 @@ struct nfs3_readlinkargs { struct nfs_fh * fh; - void * buffer; - unsigned int bufsiz; -}; - -struct nfs3_readlinkres { - struct nfs_fattr * fattr; - void * buffer; - unsigned int bufsiz; + unsigned int count; + struct page ** pages; }; struct nfs3_renameres { @@ -303,8 +287,6 @@ struct nfs3_readdirres { struct nfs_fattr * dir_attr; __u32 * verf; - void * buffer; - unsigned int bufsiz; int plus; }; @@ -322,15 +304,15 @@ int (*lookup) (struct inode *, struct qstr *, struct nfs_fh *, struct nfs_fattr *); int (*access) (struct inode *, int , int); - int (*readlink)(struct inode *, void *, unsigned int); + int (*readlink)(struct inode *, struct page *); int (*read) (struct inode *, struct rpc_cred *, struct nfs_fattr *, - int, loff_t, unsigned int, - void *buffer, int *eofp); + int, unsigned int, unsigned int, + struct page *, int *eofp); int (*write) (struct inode *, struct rpc_cred *, struct nfs_fattr *, - int, loff_t, unsigned int, - void *buffer, struct nfs_writeverf *verfp); + int, unsigned int, unsigned int, + struct page *, struct nfs_writeverf *verfp); int (*commit) (struct inode *, struct nfs_fattr *, unsigned long, unsigned int); int (*create) (struct inode *, struct qstr *, struct iattr *, @@ -349,7 +331,7 @@ struct nfs_fh *, struct nfs_fattr *); int (*rmdir) (struct inode *, struct qstr *); int (*readdir) (struct inode *, struct rpc_cred *, - u64, void *, unsigned int, int); + u64, struct page *, unsigned int, int); int (*mknod) (struct inode *, struct qstr *, struct iattr *, dev_t, struct nfs_fh *, struct nfs_fattr *); int (*statfs) (struct nfs_server *, struct nfs_fh *, diff -u --recursive --new-file linux-2.4.19/include/linux/sunrpc/clnt.h linux-2.4.19-17-rpcbuf/include/linux/sunrpc/clnt.h --- linux-2.4.19/include/linux/sunrpc/clnt.h Mon Feb 25 20:38:13 2002 +++ linux-2.4.19-17-rpcbuf/include/linux/sunrpc/clnt.h Sat Oct 5 04:01:57 2002 @@ -15,6 +15,7 @@ #include #include #include +#include /* * This defines an RPC port mapping @@ -51,6 +52,8 @@ unsigned int cl_flags; /* misc client flags */ unsigned long cl_hardmax; /* max hard timeout */ + struct rpc_rtt cl_rtt; /* RTO estimator data */ + struct rpc_portmap cl_pmap; /* port mapping */ struct rpc_wait_queue cl_bindwait; /* waiting on getport() */ @@ -90,6 +93,7 @@ kxdrproc_t p_decode; /* XDR decode function */ unsigned int p_bufsiz; /* req. buffer size */ unsigned int p_count; /* call count */ + unsigned int p_timer; /* Which RTT timer to use */ }; #define rpcproc_bufsiz(clnt, proc) ((clnt)->cl_procinfo[proc].p_bufsiz) @@ -97,6 +101,7 @@ #define rpcproc_decode(clnt, proc) ((clnt)->cl_procinfo[proc].p_decode) #define rpcproc_name(clnt, proc) ((clnt)->cl_procinfo[proc].p_procname) #define rpcproc_count(clnt, proc) ((clnt)->cl_procinfo[proc].p_count) +#define rpcproc_timer(clnt, proc) ((clnt)->cl_procinfo[proc].p_timer) #define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt)) #define RPC_PEERADDR(clnt) (&(clnt)->cl_xprt->addr) @@ -121,6 +126,7 @@ void rpc_restart_call(struct rpc_task *); void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); +void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); static __inline__ int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) diff -u --recursive --new-file linux-2.4.19/include/linux/sunrpc/sched.h linux-2.4.19-17-rpcbuf/include/linux/sunrpc/sched.h --- linux-2.4.19/include/linux/sunrpc/sched.h Thu Nov 22 20:46:19 2001 +++ linux-2.4.19-17-rpcbuf/include/linux/sunrpc/sched.h Sat Oct 5 03:51:30 2002 @@ -77,9 +77,7 @@ wait_queue_head_t tk_wait; /* sync: sleep on this q */ unsigned long tk_timeout; /* timeout for rpc_sleep() */ unsigned short tk_flags; /* misc flags */ - unsigned short tk_lock; /* Task lock counter */ - unsigned char tk_active : 1,/* Task has been activated */ - tk_wakeup : 1;/* Task waiting to wake up */ + unsigned char tk_active : 1;/* Task has been activated */ unsigned long tk_runstate; /* Task run status */ #ifdef RPC_DEBUG unsigned short tk_pid; /* debugging aid */ @@ -161,15 +159,11 @@ void rpc_remove_wait_queue(struct rpc_task *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, rpc_action action, rpc_action timer); -void rpc_sleep_locked(struct rpc_wait_queue *, struct rpc_task *, - rpc_action action, rpc_action timer); void rpc_add_timer(struct rpc_task *, rpc_action); void rpc_wake_up_task(struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); void rpc_wake_up_status(struct rpc_wait_queue *, int); -int __rpc_lock_task(struct rpc_task *); -void rpc_unlock_task(struct rpc_task *); void rpc_delay(struct rpc_task *, unsigned long); void * rpc_allocate(unsigned int flags, unsigned int); void rpc_free(void *); diff -u --recursive --new-file linux-2.4.19/include/linux/sunrpc/timer.h linux-2.4.19-17-rpcbuf/include/linux/sunrpc/timer.h --- linux-2.4.19/include/linux/sunrpc/timer.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.19-17-rpcbuf/include/linux/sunrpc/timer.h Sat Oct 5 04:01:57 2002 @@ -0,0 +1,41 @@ +/* + * linux/include/linux/sunrpc/timer.h + * + * Declarations for the RPC transport timer. + * + * Copyright (C) 2002 Trond Myklebust + */ + +#ifndef _LINUX_SUNRPC_TIMER_H +#define _LINUX_SUNRPC_TIMER_H + +#include + +struct rpc_rtt { + long timeo; /* default timeout value */ + long srtt[5]; /* smoothed round trip time << 3 */ + long sdrtt[5]; /* soothed medium deviation of RTT */ + atomic_t ntimeouts; /* Global count of the number of timeouts */ +}; + + +extern void rpc_init_rtt(struct rpc_rtt *rt, long timeo); +extern void rpc_update_rtt(struct rpc_rtt *rt, int timer, long m); +extern long rpc_calc_rto(struct rpc_rtt *rt, int timer); + +static inline void rpc_inc_timeo(struct rpc_rtt *rt) +{ + atomic_inc(&rt->ntimeouts); +} + +static inline void rpc_clear_timeo(struct rpc_rtt *rt) +{ + atomic_set(&rt->ntimeouts, 0); +} + +static inline int rpc_ntimeo(struct rpc_rtt *rt) +{ + return atomic_read(&rt->ntimeouts); +} + +#endif /* _LINUX_SUNRPC_TIMER_H */ diff -u --recursive --new-file linux-2.4.19/include/linux/sunrpc/xdr.h linux-2.4.19-17-rpcbuf/include/linux/sunrpc/xdr.h --- linux-2.4.19/include/linux/sunrpc/xdr.h Thu Nov 22 20:47:20 2001 +++ linux-2.4.19-17-rpcbuf/include/linux/sunrpc/xdr.h Sat Oct 5 04:01:57 2002 @@ -34,6 +34,31 @@ typedef int (*kxdrproc_t)(void *rqstp, u32 *data, void *obj); /* + * Basic structure for transmission/reception of a client XDR message. + * Features a header (for a linear buffer containing RPC headers + * and the data payload for short messages), and then an array of + * pages. + * The tail iovec allows you to append data after the page array. Its + * main interest is for appending padding to the pages in order to + * satisfy the int_32-alignment requirements in RFC1832. + * + * For the future, we might want to string several of these together + * in a list if anybody wants to make use of NFSv4 COMPOUND + * operations and/or has a need for scatter/gather involving pages. + */ +struct xdr_buf { + struct iovec head[1], /* RPC header + non-page data */ + tail[1]; /* Appended after page data */ + + struct page ** pages; /* Array of contiguous pages */ + unsigned int page_base, /* Start of page data */ + page_len; /* Length of page data */ + + unsigned int len; /* Total length of data */ + +}; + +/* * pre-xdr'ed macros. */ @@ -67,6 +92,11 @@ u32 * xdr_decode_netobj(u32 *p, struct xdr_netobj *); u32 * xdr_decode_netobj_fixed(u32 *p, void *obj, unsigned int len); +void xdr_encode_pages(struct xdr_buf *, struct page **, unsigned int, + unsigned int); +void xdr_inline_pages(struct xdr_buf *, unsigned int, + struct page **, unsigned int, unsigned int); + /* * Decode 64bit quantities (NFSv3 support) */ @@ -98,6 +128,40 @@ void xdr_shift_iovec(struct iovec *, int, size_t); void xdr_zero_iovec(struct iovec *, int, size_t); +/* + * Maximum number of iov's we use. + */ +#define MAX_IOVEC (12) + +/* + * XDR buffer helper functions + */ +extern int xdr_kmap(struct iovec *, struct xdr_buf *, unsigned int); +extern void xdr_kunmap(struct xdr_buf *, unsigned int); +extern void xdr_shift_buf(struct xdr_buf *, unsigned int); +extern void xdr_zero_buf(struct xdr_buf *, unsigned int); + +/* + * Helper structure for copying from an sk_buff. + */ +typedef struct { + struct sk_buff *skb; + unsigned int offset; + size_t count; + unsigned int csum; +} skb_reader_t; + +typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); + +extern void xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, + skb_reader_t *, skb_read_actor_t); + +extern int xdr_copy_skb(struct xdr_buf *xdr, unsigned int base, + struct sk_buff *skb, unsigned int offset); + +extern int xdr_copy_and_csum_skb(struct xdr_buf *xdr, unsigned int base, + struct sk_buff *skb, unsigned int offset, unsigned int csum); + #endif /* __KERNEL__ */ #endif /* _SUNRPC_XDR_H_ */ diff -u --recursive --new-file linux-2.4.19/include/linux/sunrpc/xprt.h linux-2.4.19-17-rpcbuf/include/linux/sunrpc/xprt.h --- linux-2.4.19/include/linux/sunrpc/xprt.h Sat Aug 3 02:39:46 2002 +++ linux-2.4.19-17-rpcbuf/include/linux/sunrpc/xprt.h Sat Oct 5 04:01:57 2002 @@ -13,17 +13,13 @@ #include #include #include - -/* - * Maximum number of iov's we use. - */ -#define MAX_IOVEC 10 +#include /* * The transport code maintains an estimate on the maximum number of out- * standing RPC requests, using a smoothed version of the congestion * avoidance implemented in 44BSD. This is basically the Van Jacobson - * slow start algorithm: If a retransmit occurs, the congestion window is + * congestion algorithm: If a retransmit occurs, the congestion window is * halved; otherwise, it is incremented by 1/cwnd when * * - a reply is received and @@ -36,15 +32,13 @@ * Note: on machines with low memory we should probably use a smaller * MAXREQS value: At 32 outstanding reqs with 8 megs of RAM, fragment * reassembly will frequently run out of memory. - * Come Linux 2.3, we'll handle fragments directly. */ -#define RPC_MAXCONG 16 -#define RPC_MAXREQS (RPC_MAXCONG + 1) -#define RPC_CWNDSCALE 256 +#define RPC_MAXCONG (16) +#define RPC_MAXREQS RPC_MAXCONG +#define RPC_CWNDSCALE (256) #define RPC_MAXCWND (RPC_MAXCONG * RPC_CWNDSCALE) #define RPC_INITCWND RPC_CWNDSCALE -#define RPCXPRT_CONGESTED(xprt) \ - ((xprt)->cong >= (xprt)->cwnd) +#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) /* Default timeout values */ #define RPC_MAX_UDP_TIMEOUT (60*HZ) @@ -63,22 +57,12 @@ unsigned long to_current, /* current timeout */ to_initval, /* initial timeout */ to_maxval, /* max timeout */ - to_increment, /* if !exponential */ - to_resrvval; /* reserve timeout */ + to_increment; /* if !exponential */ short to_retries; /* max # of retries */ unsigned char to_exponential; }; /* - * This is the RPC buffer - */ -struct rpc_iov { - struct iovec io_vec[MAX_IOVEC]; - unsigned int io_nr; - unsigned int io_len; -}; - -/* * This describes a complete RPC request */ struct rpc_rqst { @@ -87,8 +71,8 @@ */ struct rpc_xprt * rq_xprt; /* RPC client */ struct rpc_timeout rq_timeout; /* timeout parms */ - struct rpc_iov rq_snd_buf; /* send buffer */ - struct rpc_iov rq_rcv_buf; /* recv buffer */ + struct xdr_buf rq_snd_buf; /* send buffer */ + struct xdr_buf rq_rcv_buf; /* recv buffer */ /* * This is the private part @@ -96,7 +80,10 @@ struct rpc_task * rq_task; /* RPC task data */ __u32 rq_xid; /* request XID */ struct rpc_rqst * rq_next; /* free list */ - volatile unsigned char rq_received : 1;/* receive completed */ + int rq_cong; /* has incremented xprt->cong */ + int rq_received; /* receive completed */ + + struct list_head rq_list; /* * For authentication (e.g. auth_des) @@ -109,16 +96,14 @@ u32 rq_bytes_sent; /* Bytes we have sent */ -#ifdef RPC_PROFILE - unsigned long rq_xtime; /* when transmitted */ -#endif + long rq_xtime; /* when transmitted */ + int rq_ntimeo; + int rq_nresend; }; -#define rq_svec rq_snd_buf.io_vec -#define rq_snr rq_snd_buf.io_nr -#define rq_slen rq_snd_buf.io_len -#define rq_rvec rq_rcv_buf.io_vec -#define rq_rnr rq_rcv_buf.io_nr -#define rq_rlen rq_rcv_buf.io_len +#define rq_svec rq_snd_buf.head +#define rq_slen rq_snd_buf.len +#define rq_rvec rq_rcv_buf.head +#define rq_rlen rq_rcv_buf.len #define XPRT_LAST_FRAG (1 << 0) #define XPRT_COPY_RECM (1 << 1) @@ -135,9 +120,12 @@ unsigned long cong; /* current congestion */ unsigned long cwnd; /* congestion window */ - unsigned long congtime; /* hold cwnd until then */ + + unsigned int rcvsize, /* socket receive buffer size */ + sndsize; /* socket send buffer size */ struct rpc_wait_queue sending; /* requests waiting to send */ + struct rpc_wait_queue resend; /* requests waiting to resend */ struct rpc_wait_queue pending; /* requests in flight */ struct rpc_wait_queue backlog; /* waiting for slot */ struct rpc_rqst * free; /* free slots */ @@ -164,6 +152,8 @@ spinlock_t xprt_lock; /* lock xprt info */ struct rpc_task * snd_task; /* Task blocked in send */ + struct list_head recv; + void (*old_data_ready)(struct sock *, int); void (*old_state_change)(struct sock *); @@ -182,20 +172,16 @@ void xprt_set_timeout(struct rpc_timeout *, unsigned int, unsigned long); -int xprt_reserve(struct rpc_task *); +void xprt_reserve(struct rpc_task *); void xprt_transmit(struct rpc_task *); void xprt_receive(struct rpc_task *); int xprt_adjust_timeout(struct rpc_timeout *); void xprt_release(struct rpc_task *); void xprt_reconnect(struct rpc_task *); int xprt_clear_backlog(struct rpc_xprt *); +void xprt_sock_setbufsize(struct rpc_xprt *); -#define XPRT_WSPACE 0 -#define XPRT_CONNECT 1 - -#define xprt_wspace(xp) (test_bit(XPRT_WSPACE, &(xp)->sockstate)) -#define xprt_test_and_set_wspace(xp) (test_and_set_bit(XPRT_WSPACE, &(xp)->sockstate)) -#define xprt_clear_wspace(xp) (clear_bit(XPRT_WSPACE, &(xp)->sockstate)) +#define XPRT_CONNECT 0 #define xprt_connected(xp) (!(xp)->stream || test_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate)) diff -u --recursive --new-file linux-2.4.19/net/sunrpc/Makefile linux-2.4.19-17-rpcbuf/net/sunrpc/Makefile --- linux-2.4.19/net/sunrpc/Makefile Fri Dec 29 23:07:24 2000 +++ linux-2.4.19-17-rpcbuf/net/sunrpc/Makefile Sat Oct 5 03:51:46 2002 @@ -14,7 +14,7 @@ obj-y := clnt.o xprt.o sched.o \ auth.o auth_null.o auth_unix.o \ svc.o svcsock.o svcauth.o \ - pmap_clnt.o xdr.o sunrpc_syms.o + pmap_clnt.o timer.o xdr.o sunrpc_syms.o obj-$(CONFIG_PROC_FS) += stats.o obj-$(CONFIG_SYSCTL) += sysctl.o diff -u --recursive --new-file linux-2.4.19/net/sunrpc/clnt.c linux-2.4.19-17-rpcbuf/net/sunrpc/clnt.c --- linux-2.4.19/net/sunrpc/clnt.c Fri Sep 21 20:24:50 2001 +++ linux-2.4.19-17-rpcbuf/net/sunrpc/clnt.c Sat Oct 5 03:53:49 2002 @@ -43,6 +43,7 @@ static DECLARE_WAIT_QUEUE_HEAD(destroy_wait); +static void call_start(struct rpc_task *task); static void call_reserve(struct rpc_task *task); static void call_reserveresult(struct rpc_task *task); static void call_allocate(struct rpc_task *task); @@ -108,6 +109,8 @@ if (!clnt->cl_port) clnt->cl_autobind = 1; + rpc_init_rtt(&clnt->cl_rtt, xprt->timeout.to_initval); + if (!rpcauth_create(flavor, clnt)) goto out_no_auth; @@ -328,13 +331,23 @@ rpcauth_bindcred(task); if (task->tk_status == 0) - task->tk_action = call_reserve; + task->tk_action = call_start; else task->tk_action = NULL; +} - /* Increment call count */ - if (task->tk_msg.rpc_proc < task->tk_client->cl_maxproc) - rpcproc_count(task->tk_client, task->tk_msg.rpc_proc)++; +void +rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) +{ + struct rpc_xprt *xprt = clnt->cl_xprt; + + xprt->sndsize = 0; + if (sndsize) + xprt->sndsize = sndsize + RPC_SLACK_SPACE; + xprt->rcvsize = 0; + if (rcvsize) + xprt->rcvsize = rcvsize + RPC_SLACK_SPACE; + xprt_sock_setbufsize(xprt); } /* @@ -347,26 +360,46 @@ if (RPC_ASSASSINATED(task)) return; - task->tk_action = call_reserve; - rpcproc_count(task->tk_client, task->tk_msg.rpc_proc)++; + task->tk_action = call_start; } /* - * 1. Reserve an RPC call slot + * 0. Initial state + * + * Other FSM states can be visited zero or more times, but + * this state is visited exactly once for each RPC. */ static void -call_reserve(struct rpc_task *task) +call_start(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; if (task->tk_msg.rpc_proc > clnt->cl_maxproc) { - printk(KERN_WARNING "%s (vers %d): bad procedure number %d\n", - clnt->cl_protname, clnt->cl_vers, task->tk_msg.rpc_proc); + printk(KERN_ERR "%s (vers %d): bad procedure number %d\n", + clnt->cl_protname, clnt->cl_vers, + task->tk_msg.rpc_proc); rpc_exit(task, -EIO); return; } + dprintk("RPC: %4d call_start %s%d proc %d (%s)\n", task->tk_pid, + clnt->cl_protname, clnt->cl_vers, task->tk_msg.rpc_proc, + (RPC_IS_ASYNC(task) ? "async" : "sync")); + + /* Increment call count */ + rpcproc_count(clnt, task->tk_msg.rpc_proc)++; + clnt->cl_stats->rpccnt++; + task->tk_action = call_reserve; +} + +/* + * 1. Reserve an RPC call slot + */ +static void +call_reserve(struct rpc_task *task) +{ dprintk("RPC: %4d call_reserve\n", task->tk_pid); + if (!rpcauth_uptodatecred(task)) { task->tk_action = call_refresh; return; @@ -374,8 +407,6 @@ task->tk_status = 0; task->tk_action = call_reserveresult; - task->tk_timeout = clnt->cl_timeout.to_resrvval; - clnt->cl_stats->rpccnt++; xprt_reserve(task); } @@ -389,38 +420,46 @@ dprintk("RPC: %4d call_reserveresult (status %d)\n", task->tk_pid, task->tk_status); + /* * After a call to xprt_reserve(), we must have either * a request slot or else an error status. */ - if ((task->tk_status >= 0 && !task->tk_rqstp) || - (task->tk_status < 0 && task->tk_rqstp)) - printk(KERN_ERR "call_reserveresult: status=%d, request=%p??\n", - task->tk_status, task->tk_rqstp); + task->tk_status = 0; + if (status >= 0) { + if (task->tk_rqstp) { + task->tk_action = call_allocate; + return; + } - if (task->tk_status >= 0) { - task->tk_action = call_allocate; + printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n", + __FUNCTION__, status); + rpc_exit(task, -EIO); return; } - task->tk_status = 0; + /* + * Even though there was an error, we may have acquired + * a request slot somehow. Make sure not to leak it. + */ + if (task->tk_rqstp) { + printk(KERN_ERR "%s: status=%d, request allocated anyway\n", + __FUNCTION__, status); + xprt_release(task); + } + switch (status) { - case -EAGAIN: - case -ENOBUFS: - task->tk_timeout = task->tk_client->cl_timeout.to_resrvval; + case -EAGAIN: /* woken up; retry */ task->tk_action = call_reserve; - break; - case -ETIMEDOUT: - dprintk("RPC: task timed out\n"); - task->tk_action = call_timeout; + return; + case -EIO: /* probably a shutdown */ break; default: - if (!task->tk_rqstp) { - printk(KERN_INFO "RPC: task has no request, exit EIO\n"); - rpc_exit(task, -EIO); - } else - rpc_exit(task, status); + printk(KERN_ERR "%s: unrecognized error %d, exiting\n", + __FUNCTION__, status); + break; } + rpc_exit(task, status); } /* @@ -465,6 +504,8 @@ { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; + struct xdr_buf *sndbuf = &req->rq_snd_buf; + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; unsigned int bufsiz; kxdrproc_t encode; int status; @@ -477,14 +518,16 @@ /* Default buffer setup */ bufsiz = rpcproc_bufsiz(clnt, task->tk_msg.rpc_proc)+RPC_SLACK_SPACE; - req->rq_svec[0].iov_base = (void *)task->tk_buffer; - req->rq_svec[0].iov_len = bufsiz; - req->rq_slen = 0; - req->rq_snr = 1; - req->rq_rvec[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); - req->rq_rvec[0].iov_len = bufsiz; - req->rq_rlen = bufsiz; - req->rq_rnr = 1; + sndbuf->head[0].iov_base = (void *)task->tk_buffer; + sndbuf->head[0].iov_len = bufsiz; + sndbuf->tail[0].iov_len = 0; + sndbuf->page_len = 0; + sndbuf->len = 0; + rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); + rcvbuf->head[0].iov_len = bufsiz; + rcvbuf->tail[0].iov_len = 0; + rcvbuf->page_len = 0; + rcvbuf->len = bufsiz; /* Zero buffer so we have automatic zero-padding of opaque & string */ memset(task->tk_buffer, 0, bufsiz); @@ -511,6 +554,9 @@ struct rpc_clnt *clnt = task->tk_client; struct rpc_xprt *xprt = clnt->cl_xprt; + dprintk("RPC: %4d call_bind xprt %p %s connected\n", task->tk_pid, + xprt, (xprt_connected(xprt) ? "is" : "is not")); + task->tk_action = (xprt_connected(xprt)) ? call_transmit : call_reconnect; if (!clnt->cl_port) { @@ -575,7 +621,7 @@ if (task->tk_status < 0) return; xprt_transmit(task); - if (!rpcproc_decode(clnt, task->tk_msg.rpc_proc)) { + if (!rpcproc_decode(clnt, task->tk_msg.rpc_proc) && task->tk_status >= 0) { task->tk_action = NULL; rpc_wake_up_task(task); } @@ -589,19 +635,22 @@ { struct rpc_clnt *clnt = task->tk_client; struct rpc_xprt *xprt = clnt->cl_xprt; - struct rpc_rqst *req; - int status = task->tk_status; + struct rpc_rqst *req = task->tk_rqstp; + int status; + + if (req->rq_received != 0) + task->tk_status = req->rq_received; dprintk("RPC: %4d call_status (status %d)\n", task->tk_pid, task->tk_status); + status = task->tk_status; if (status >= 0) { task->tk_action = call_decode; return; } task->tk_status = 0; - req = task->tk_rqstp; switch(status) { case -ETIMEDOUT: task->tk_action = call_timeout; @@ -626,7 +675,6 @@ case -ENOMEM: case -EAGAIN: task->tk_action = call_transmit; - clnt->cl_stats->rpcretrans++; break; default: if (clnt->cl_chatty) @@ -645,20 +693,15 @@ call_timeout(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - struct rpc_rqst *req = task->tk_rqstp; + struct rpc_timeout *to = &task->tk_rqstp->rq_timeout; - if (req) { - struct rpc_timeout *to = &req->rq_timeout; - - if (xprt_adjust_timeout(to)) { - dprintk("RPC: %4d call_timeout (minor timeo)\n", - task->tk_pid); - goto minor_timeout; - } - to->to_retries = clnt->cl_timeout.to_retries; + if (xprt_adjust_timeout(to)) { + dprintk("RPC: %4d call_timeout (minor)\n", task->tk_pid); + goto retry; } + to->to_retries = clnt->cl_timeout.to_retries; - dprintk("RPC: %4d call_timeout (major timeo)\n", task->tk_pid); + dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid); if (clnt->cl_softrtry) { if (clnt->cl_chatty && !task->tk_exit) printk(KERN_NOTICE "%s: server %s not responding, timed out\n", @@ -666,33 +709,18 @@ rpc_exit(task, -EIO); return; } - if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) { + + if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN) && rpc_ntimeo(&clnt->cl_rtt) > 7) { task->tk_flags |= RPC_CALL_MAJORSEEN; - if (req) - printk(KERN_NOTICE "%s: server %s not responding, still trying\n", - clnt->cl_protname, clnt->cl_server); -#ifdef RPC_DEBUG - else - printk(KERN_NOTICE "%s: task %d can't get a request slot\n", - clnt->cl_protname, task->tk_pid); -#endif + printk(KERN_NOTICE "%s: server %s not responding, still trying\n", + clnt->cl_protname, clnt->cl_server); } if (clnt->cl_autobind) clnt->cl_port = 0; -minor_timeout: - if (!req) - task->tk_action = call_reserve; - else if (!clnt->cl_port) { - task->tk_action = call_bind; - clnt->cl_stats->rpcretrans++; - } else if (!xprt_connected(clnt->cl_xprt)) { - task->tk_action = call_reconnect; - clnt->cl_stats->rpcretrans++; - } else { - task->tk_action = call_transmit; - clnt->cl_stats->rpcretrans++; - } +retry: + clnt->cl_stats->rpcretrans++; + task->tk_action = call_bind; task->tk_status = 0; } diff -u --recursive --new-file linux-2.4.19/net/sunrpc/sched.c linux-2.4.19-17-rpcbuf/net/sunrpc/sched.c --- linux-2.4.19/net/sunrpc/sched.c Sat Aug 3 02:39:46 2002 +++ linux-2.4.19-17-rpcbuf/net/sunrpc/sched.c Sat Oct 5 03:51:30 2002 @@ -73,7 +73,7 @@ * Spinlock for wait queues. Access to the latter also has to be * interrupt-safe in order to allow timers to wake up sleeping tasks. */ -spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED; /* * Spinlock for other critical sections of code. */ @@ -157,7 +157,7 @@ void rpc_add_timer(struct rpc_task *task, rpc_action timer) { spin_lock_bh(&rpc_queue_lock); - if (!(RPC_IS_RUNNING(task) || task->tk_wakeup)) + if (!RPC_IS_RUNNING(task)) __rpc_add_timer(task, timer); spin_unlock_bh(&rpc_queue_lock); } @@ -358,27 +358,10 @@ spin_unlock_bh(&rpc_queue_lock); } -void -rpc_sleep_locked(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action, rpc_action timer) -{ - /* - * Protect the queue operations. - */ - spin_lock_bh(&rpc_queue_lock); - __rpc_sleep_on(q, task, action, timer); - __rpc_lock_task(task); - spin_unlock_bh(&rpc_queue_lock); -} - /** * __rpc_wake_up_task - wake up a single rpc_task * @task: task to be woken up * - * If the task is locked, it is merely removed from the queue, and - * 'task->tk_wakeup' is set. rpc_unlock_task() will then ensure - * that it is woken up as soon as the lock count goes to zero. - * * Caller must hold rpc_queue_lock */ static void @@ -407,14 +390,6 @@ if (task->tk_rpcwait != &schedq) __rpc_remove_wait_queue(task); - /* If the task has been locked, then set tk_wakeup so that - * rpc_unlock_task() wakes us up... */ - if (task->tk_lock) { - task->tk_wakeup = 1; - return; - } else - task->tk_wakeup = 0; - rpc_make_runnable(task); dprintk("RPC: __rpc_wake_up_task done\n"); @@ -497,30 +472,6 @@ } /* - * Lock down a sleeping task to prevent it from waking up - * and disappearing from beneath us. - * - * This function should always be called with the - * rpc_queue_lock held. - */ -int -__rpc_lock_task(struct rpc_task *task) -{ - if (!RPC_IS_RUNNING(task)) - return ++task->tk_lock; - return 0; -} - -void -rpc_unlock_task(struct rpc_task *task) -{ - spin_lock_bh(&rpc_queue_lock); - if (task->tk_lock && !--task->tk_lock && task->tk_wakeup) - __rpc_wake_up_task(task); - spin_unlock_bh(&rpc_queue_lock); -} - -/* * Run a task at a later time */ static void __rpc_atrun(struct rpc_task *); @@ -704,15 +655,7 @@ spin_unlock_bh(&rpc_queue_lock); break; } - if (task->tk_lock) { - spin_unlock_bh(&rpc_queue_lock); - printk(KERN_ERR "RPC: Locked task was scheduled !!!!\n"); -#ifdef RPC_DEBUG - rpc_debug = ~0; - rpc_show_tasks(); -#endif - break; - } + __rpc_remove_wait_queue(task); spin_unlock_bh(&rpc_queue_lock); diff -u --recursive --new-file linux-2.4.19/net/sunrpc/sunrpc_syms.c linux-2.4.19-17-rpcbuf/net/sunrpc/sunrpc_syms.c --- linux-2.4.19/net/sunrpc/sunrpc_syms.c Fri Sep 21 06:02:01 2001 +++ linux-2.4.19-17-rpcbuf/net/sunrpc/sunrpc_syms.c Sat Oct 5 03:53:49 2002 @@ -50,6 +50,7 @@ EXPORT_SYMBOL(rpc_clnt_sigunmask); EXPORT_SYMBOL(rpc_delay); EXPORT_SYMBOL(rpc_restart_call); +EXPORT_SYMBOL(rpc_setbufsize); /* Client transport */ EXPORT_SYMBOL(xprt_create_proto); @@ -95,8 +96,9 @@ EXPORT_SYMBOL(xdr_decode_string_inplace); EXPORT_SYMBOL(xdr_decode_netobj); EXPORT_SYMBOL(xdr_encode_netobj); -EXPORT_SYMBOL(xdr_shift_iovec); -EXPORT_SYMBOL(xdr_zero_iovec); +EXPORT_SYMBOL(xdr_encode_pages); +EXPORT_SYMBOL(xdr_inline_pages); +EXPORT_SYMBOL(xdr_shift_buf); /* Debugging symbols */ #ifdef RPC_DEBUG diff -u --recursive --new-file linux-2.4.19/net/sunrpc/timer.c linux-2.4.19-17-rpcbuf/net/sunrpc/timer.c --- linux-2.4.19/net/sunrpc/timer.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.19-17-rpcbuf/net/sunrpc/timer.c Sat Oct 5 03:51:50 2002 @@ -0,0 +1,74 @@ +#include +#include +#include + +#include +#include +#include + +#define RPC_RTO_MAX (60*HZ) +#define RPC_RTO_INIT (HZ/5) +#define RPC_RTO_MIN (2) + +void +rpc_init_rtt(struct rpc_rtt *rt, long timeo) +{ + long t = (timeo - RPC_RTO_INIT) << 3; + int i; + rt->timeo = timeo; + if (t < 0) + t = 0; + for (i = 0; i < 5; i++) { + rt->srtt[i] = t; + rt->sdrtt[i] = RPC_RTO_INIT; + } + atomic_set(&rt->ntimeouts, 0); +} + +void +rpc_update_rtt(struct rpc_rtt *rt, int timer, long m) +{ + long *srtt, *sdrtt; + + if (timer-- == 0) + return; + + if (m == 0) + m = 1; + srtt = &rt->srtt[timer]; + m -= *srtt >> 3; + *srtt += m; + if (m < 0) + m = -m; + sdrtt = &rt->sdrtt[timer]; + m -= *sdrtt >> 2; + *sdrtt += m; + /* Set lower bound on the variance */ + if (*sdrtt < RPC_RTO_MIN) + *sdrtt = RPC_RTO_MIN; +} + +/* + * Estimate rto for an nfs rpc sent via. an unreliable datagram. + * Use the mean and mean deviation of rtt for the appropriate type of rpc + * for the frequent rpcs and a default for the others. + * The justification for doing "other" this way is that these rpcs + * happen so infrequently that timer est. would probably be stale. + * Also, since many of these rpcs are + * non-idempotent, a conservative timeout is desired. + * getattr, lookup, + * read, write, commit - A+4D + * other - timeo + */ + +long +rpc_calc_rto(struct rpc_rtt *rt, int timer) +{ + long res; + if (timer-- == 0) + return rt->timeo; + res = (rt->srtt[timer] >> 3) + rt->sdrtt[timer]; + if (res > RPC_RTO_MAX) + res = RPC_RTO_MAX; + return res; +} diff -u --recursive --new-file linux-2.4.19/net/sunrpc/xdr.c linux-2.4.19-17-rpcbuf/net/sunrpc/xdr.c --- linux-2.4.19/net/sunrpc/xdr.c Mon Oct 1 18:19:56 2001 +++ linux-2.4.19-17-rpcbuf/net/sunrpc/xdr.c Fri Nov 1 18:36:01 2002 @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include #include @@ -100,6 +102,46 @@ } +void +xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, + unsigned int len) +{ + xdr->pages = pages; + xdr->page_base = base; + xdr->page_len = len; + + if (len & 3) { + struct iovec *iov = xdr->tail; + unsigned int pad = 4 - (len & 3); + + iov->iov_base = (void *) "\0\0\0"; + iov->iov_len = pad; + len += pad; + } + xdr->len += len; +} + +void +xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, + struct page **pages, unsigned int base, unsigned int len) +{ + struct iovec *head = xdr->head; + struct iovec *tail = xdr->tail; + char *buf = (char *)head->iov_base; + unsigned int buflen = head->iov_len; + + head->iov_len = offset; + + xdr->pages = pages; + xdr->page_base = base; + xdr->page_len = len; + + tail->iov_base = buf + offset; + tail->iov_len = buflen - offset; + + xdr->len += len; +} + /* * Realign the iovec if the server missed out some reply elements * (such as post-op attributes,...) @@ -132,19 +174,153 @@ } /* - * Zero the last n bytes in an iovec array of 'nr' elements + * Map a struct xdr_buf into an iovec array. */ -void xdr_zero_iovec(struct iovec *iov, int nr, size_t n) +int xdr_kmap(struct iovec *iov_base, struct xdr_buf *xdr, unsigned int base) { - struct iovec *pvec; + struct iovec *iov = iov_base; + struct page **ppage = xdr->pages; + unsigned int len, pglen = xdr->page_len; + + len = xdr->head[0].iov_len; + if (base < len) { + iov->iov_len = len - base; + iov->iov_base = (char *)xdr->head[0].iov_base + base; + iov++; + base = 0; + } else + base -= len; + + if (pglen == 0) + goto map_tail; + if (base >= pglen) { + base -= pglen; + goto map_tail; + } + if (base || xdr->page_base) { + pglen -= base; + base += xdr->page_base; + ppage += base >> PAGE_CACHE_SHIFT; + base &= ~PAGE_CACHE_MASK; + } + do { + len = PAGE_CACHE_SIZE; + iov->iov_base = kmap(*ppage); + if (base) { + iov->iov_base += base; + len -= base; + base = 0; + } + if (pglen < len) + len = pglen; + iov->iov_len = len; + iov++; + ppage++; + } while ((pglen -= len) != 0); +map_tail: + if (xdr->tail[0].iov_len) { + iov->iov_len = xdr->tail[0].iov_len - base; + iov->iov_base = (char *)xdr->tail[0].iov_base + base; + iov++; + } + return (iov - iov_base); +} - for (pvec = iov + nr - 1; n && nr > 0; nr--, pvec--) { - if (n < pvec->iov_len) { - memset((char *)pvec->iov_base + pvec->iov_len - n, 0, n); - n = 0; +void xdr_kunmap(struct xdr_buf *xdr, unsigned int base) +{ + struct page **ppage = xdr->pages; + unsigned int pglen = xdr->page_len; + + if (!pglen) + return; + if (base > xdr->head[0].iov_len) + base -= xdr->head[0].iov_len; + else + base = 0; + + if (base >= pglen) + return; + if (base || xdr->page_base) { + pglen -= base; + base += xdr->page_base; + ppage += base >> PAGE_CACHE_SHIFT; + pglen += base & ~PAGE_CACHE_MASK; + } + for (;;) { + flush_dcache_page(*ppage); + kunmap(*ppage); + if (pglen <= PAGE_CACHE_SIZE) + break; + pglen -= PAGE_CACHE_SIZE; + ppage++; + } +} + +void +xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, + skb_reader_t *desc, + skb_read_actor_t copy_actor) +{ + struct page **ppage = xdr->pages; + unsigned int len, pglen = xdr->page_len; + int ret; + + len = xdr->head[0].iov_len; + if (base < len) { + len -= base; + ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); + if (ret != len || !desc->count) + return; + base = 0; + } else + base -= len; + + if (pglen == 0) + goto copy_tail; + if (base >= pglen) { + base -= pglen; + goto copy_tail; + } + if (base || xdr->page_base) { + pglen -= base; + base += xdr->page_base; + ppage += base >> PAGE_CACHE_SHIFT; + base &= ~PAGE_CACHE_MASK; + } + do { + char *kaddr; + + len = PAGE_CACHE_SIZE; + kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); + if (base) { + len -= base; + if (pglen < len) + len = pglen; + ret = copy_actor(desc, kaddr + base, len); + base = 0; } else { - memset(pvec->iov_base, 0, pvec->iov_len); - n -= pvec->iov_len; + if (pglen < len) + len = pglen; + ret = copy_actor(desc, kaddr, len); } - } + kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); + if (ret != len || !desc->count) + return; + ppage++; + } while ((pglen -= len) != 0); +copy_tail: + len = xdr->tail[0].iov_len; + if (len) + copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len); +} + +void +xdr_shift_buf(struct xdr_buf *xdr, size_t len) +{ + struct iovec iov[MAX_IOVEC]; + unsigned int nr; + + nr = xdr_kmap(iov, xdr, 0); + xdr_shift_iovec(iov, nr, len); + xdr_kunmap(xdr, 0); } diff -u --recursive --new-file linux-2.4.19/net/sunrpc/xprt.c linux-2.4.19-17-rpcbuf/net/sunrpc/xprt.c --- linux-2.4.19/net/sunrpc/xprt.c Sat Aug 3 02:39:46 2002 +++ linux-2.4.19-17-rpcbuf/net/sunrpc/xprt.c Sat Oct 5 03:53:49 2002 @@ -67,8 +67,6 @@ #include -extern spinlock_t rpc_queue_lock; - /* * Local variables */ @@ -78,16 +76,19 @@ # define RPCDBG_FACILITY RPCDBG_XPRT #endif +#define XPRT_MAX_BACKOFF (8) + /* * Local functions */ static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static void do_xprt_transmit(struct rpc_task *); -static void xprt_reserve_status(struct rpc_task *task); +static inline void do_xprt_reserve(struct rpc_task *); static void xprt_disconnect(struct rpc_xprt *); static void xprt_reconn_status(struct rpc_task *task); static struct socket *xprt_create_socket(int, struct rpc_timeout *); static int xprt_bind_socket(struct rpc_xprt *, struct socket *); +static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); #ifdef RPC_DEBUG_DATA /* @@ -130,75 +131,74 @@ } /* - * Adjust the iovec to move on 'n' bytes - */ - -extern inline void -xprt_move_iov(struct msghdr *msg, struct iovec *niv, unsigned amount) -{ - struct iovec *iv=msg->msg_iov; - int i; - - /* - * Eat any sent iovecs - */ - while (iv->iov_len <= amount) { - amount -= iv->iov_len; - iv++; - msg->msg_iovlen--; - } - - /* - * And chew down the partial one - */ - niv[0].iov_len = iv->iov_len-amount; - niv[0].iov_base =((unsigned char *)iv->iov_base)+amount; - iv++; - - /* - * And copy any others - */ - for(i = 1; i < msg->msg_iovlen; i++) - niv[i]=*iv++; - - msg->msg_iov=niv; -} - -/* * Serialize write access to sockets, in order to prevent different * requests from interfering with each other. * Also prevents TCP socket reconnections from colliding with writes. */ static int -xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) +__xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) { - int retval; - spin_lock_bh(&xprt->sock_lock); - if (!xprt->snd_task) - xprt->snd_task = task; - else if (xprt->snd_task != task) { - dprintk("RPC: %4d TCP write queue full (task %d)\n", - task->tk_pid, xprt->snd_task->tk_pid); + if (!xprt->snd_task) { + if (xprt->nocong || __xprt_get_cong(xprt, task)) + xprt->snd_task = task; + } + if (xprt->snd_task != task) { + dprintk("RPC: %4d TCP write queue full\n", task->tk_pid); task->tk_timeout = 0; task->tk_status = -EAGAIN; - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + if (task->tk_rqstp->rq_nresend) + rpc_sleep_on(&xprt->resend, task, NULL, NULL); + else + rpc_sleep_on(&xprt->sending, task, NULL, NULL); } - retval = xprt->snd_task == task; + return xprt->snd_task == task; +} + +static inline int +xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) +{ + int retval; + spin_lock_bh(&xprt->sock_lock); + retval = __xprt_lock_write(xprt, task); spin_unlock_bh(&xprt->sock_lock); return retval; } +static void +__xprt_lock_write_next(struct rpc_xprt *xprt) +{ + struct rpc_task *task; + + if (xprt->snd_task) + return; + task = rpc_wake_up_next(&xprt->resend); + if (!task) { + if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) + return; + task = rpc_wake_up_next(&xprt->sending); + if (!task) + return; + } + if (xprt->nocong || __xprt_get_cong(xprt, task)) + xprt->snd_task = task; +} + /* * Releases the socket for use by other requests. */ static void +__xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) +{ + if (xprt->snd_task == task) + xprt->snd_task = NULL; + __xprt_lock_write_next(xprt); +} + +static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) { spin_lock_bh(&xprt->sock_lock); - if (xprt->snd_task == task) { - xprt->snd_task = NULL; - rpc_wake_up_next(&xprt->sending); - } + __xprt_release_write(xprt, task); spin_unlock_bh(&xprt->sock_lock); } @@ -210,13 +210,11 @@ { struct socket *sock = xprt->sock; struct msghdr msg; + struct xdr_buf *xdr = &req->rq_snd_buf; + struct iovec niv[MAX_IOVEC]; + unsigned int niov, slen, skip; mm_segment_t oldfs; int result; - int slen = req->rq_slen - req->rq_bytes_sent; - struct iovec niv[MAX_IOVEC]; - - if (slen <= 0) - return 0; if (!sock) return -ENOTCONN; @@ -225,22 +223,26 @@ req->rq_svec->iov_base, req->rq_svec->iov_len); + /* Dont repeat bytes */ + skip = req->rq_bytes_sent; + slen = xdr->len - skip; + niov = xdr_kmap(niv, xdr, skip); + msg.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL; - msg.msg_iov = req->rq_svec; - msg.msg_iovlen = req->rq_snr; + msg.msg_iov = niv; + msg.msg_iovlen = niov; msg.msg_name = (struct sockaddr *) &xprt->addr; msg.msg_namelen = sizeof(xprt->addr); msg.msg_control = NULL; msg.msg_controllen = 0; - /* Dont repeat bytes */ - if (req->rq_bytes_sent) - xprt_move_iov(&msg, niv, req->rq_bytes_sent); - oldfs = get_fs(); set_fs(get_ds()); + clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); result = sock_sendmsg(sock, &msg, slen); set_fs(oldfs); + xdr_kunmap(xdr, skip); + dprintk("RPC: xprt_sendmsg(%d) = %d\n", slen, result); if (result >= 0) @@ -251,10 +253,7 @@ /* When the server has died, an ICMP port unreachable message * prompts ECONNREFUSED. */ - break; case -EAGAIN: - if (test_bit(SOCK_NOSPACE, &sock->flags)) - result = -ENOMEM; break; case -ENOTCONN: case -EPIPE: @@ -269,6 +268,40 @@ } /* + * Van Jacobson congestion avoidance. Check if the congestion window + * overflowed. Put the task to sleep if this is the case. + */ +static int +__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + + if (req->rq_cong) + return 1; + dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n", + task->tk_pid, xprt->cong, xprt->cwnd); + if (RPCXPRT_CONGESTED(xprt)) + return 0; + req->rq_cong = 1; + xprt->cong += RPC_CWNDSCALE; + return 1; +} + +/* + * Adjust the congestion window, and wake up the next task + * that has been sleeping due to congestion + */ +static void +__xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) +{ + if (!req->rq_cong) + return; + req->rq_cong = 0; + xprt->cong -= RPC_CWNDSCALE; + __xprt_lock_write_next(xprt); +} + +/* * Adjust RPC congestion window * We use a time-smoothed congestion estimator to avoid heavy oscillation. */ @@ -277,40 +310,22 @@ { unsigned long cwnd; - if (xprt->nocong) - return; - /* - * Note: we're in a BH context - */ - spin_lock(&xprt->xprt_lock); cwnd = xprt->cwnd; - if (result >= 0) { - if (xprt->cong < cwnd || time_before(jiffies, xprt->congtime)) - goto out; + if (result >= 0 && cwnd <= xprt->cong) { /* The (cwnd >> 1) term makes sure * the result gets rounded properly. */ cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; if (cwnd > RPC_MAXCWND) cwnd = RPC_MAXCWND; - else - pprintk("RPC: %lu %ld cwnd\n", jiffies, cwnd); - xprt->congtime = jiffies + ((cwnd * HZ) << 2) / RPC_CWNDSCALE; - dprintk("RPC: cong %08lx, cwnd was %08lx, now %08lx, " - "time %ld ms\n", xprt->cong, xprt->cwnd, cwnd, - (xprt->congtime-jiffies)*1000/HZ); + __xprt_lock_write_next(xprt); } else if (result == -ETIMEDOUT) { - if ((cwnd >>= 1) < RPC_CWNDSCALE) + cwnd >>= 1; + if (cwnd < RPC_CWNDSCALE) cwnd = RPC_CWNDSCALE; - xprt->congtime = jiffies + ((cwnd * HZ) << 3) / RPC_CWNDSCALE; - dprintk("RPC: cong %ld, cwnd was %ld, now %ld, " - "time %ld ms\n", xprt->cong, xprt->cwnd, cwnd, - (xprt->congtime-jiffies)*1000/HZ); - pprintk("RPC: %lu %ld cwnd\n", jiffies, cwnd); } - + dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", + xprt->cong, xprt->cwnd, cwnd); xprt->cwnd = cwnd; - out: - spin_unlock(&xprt->xprt_lock); } /* @@ -462,7 +477,7 @@ /* if the socket is already closing, delay 5 secs */ if ((1<state) & ~(TCP_SYN_SENT|TCP_SYN_RECV)) task->tk_timeout = 5*HZ; - rpc_sleep_on(&xprt->sending, task, xprt_reconn_status, NULL); + rpc_sleep_on(&xprt->pending, task, xprt_reconn_status, NULL); release_sock(inet); return; } @@ -498,30 +513,16 @@ static inline struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) { - struct rpc_task *head, *task; - struct rpc_rqst *req; - int safe = 0; + struct list_head *pos; + struct rpc_rqst *req = NULL; - spin_lock_bh(&rpc_queue_lock); - if ((head = xprt->pending.task) != NULL) { - task = head; - do { - if ((req = task->tk_rqstp) && req->rq_xid == xid) - goto out; - task = task->tk_next; - if (++safe > 100) { - printk("xprt_lookup_rqst: loop in Q!\n"); - goto out_bad; - } - } while (task != head); + list_for_each(pos, &xprt->recv) { + struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list); + if (entry->rq_xid == xid) { + req = entry; + break; + } } - dprintk("RPC: unknown XID %08x in reply.\n", xid); - out_bad: - req = NULL; - out: - if (req && !__rpc_lock_task(req->rq_task)) - req = NULL; - spin_unlock_bh(&rpc_queue_lock); return req; } @@ -529,13 +530,23 @@ * Complete reply received. * The TCP code relies on us to remove the request from xprt->pending. */ -static inline void +static void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) { struct rpc_task *task = req->rq_task; + struct rpc_clnt *clnt = task->tk_client; /* Adjust congestion window */ - xprt_adjust_cwnd(xprt, copied); + if (!xprt->nocong) { + xprt_adjust_cwnd(xprt, copied); + __xprt_put_cong(xprt, req); + if (!req->rq_nresend) { + int timer = rpcproc_timer(clnt, task->tk_msg.rpc_proc); + if (timer) + rpc_update_rtt(&clnt->cl_rtt, timer, (long)jiffies - req->rq_xtime); + } + rpc_clear_timeo(&clnt->cl_rtt); + } #ifdef RPC_PROFILE /* Profile only reads for now */ @@ -557,66 +568,68 @@ #endif dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied); - task->tk_status = copied; - req->rq_received = 1; + req->rq_received = copied; + list_del_init(&req->rq_list); /* ... and wake up the process. */ rpc_wake_up_task(task); return; } +static size_t +skb_read_bits(skb_reader_t *desc, void *to, size_t len) +{ + if (len > desc->count) + len = desc->count; + skb_copy_bits(desc->skb, desc->offset, to, len); + desc->count -= len; + desc->offset += len; + return len; +} + +static size_t +skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) +{ + unsigned int csum2, pos; + + if (len > desc->count) + len = desc->count; + pos = desc->offset; + csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); + desc->csum = csum_block_add(desc->csum, csum2, pos); + desc->count -= len; + desc->offset += len; + return len; +} + /* * We have set things up such that we perform the checksum of the UDP * packet in parallel with the copies into the RPC client iovec. -DaveM */ -static int csum_partial_copy_to_page_cache(struct iovec *iov, - struct sk_buff *skb, - int copied) -{ - int offset = sizeof(struct udphdr); - __u8 *cur_ptr = iov->iov_base; - __kernel_size_t cur_len = iov->iov_len; - unsigned int csum = skb->csum; - int need_csum = (skb->ip_summed != CHECKSUM_UNNECESSARY); - int slack = skb->len - copied - sizeof(struct udphdr); - - if (need_csum) - csum = csum_partial(skb->data, sizeof(struct udphdr), csum); - while (copied > 0) { - if (cur_len) { - int to_move = cur_len; - if (to_move > copied) - to_move = copied; - if (need_csum) { - unsigned int csum2; - - csum2 = skb_copy_and_csum_bits(skb, offset, - cur_ptr, - to_move, 0); - csum = csum_block_add(csum, csum2, offset); - } else - skb_copy_bits(skb, offset, cur_ptr, to_move); - offset += to_move; - copied -= to_move; - cur_ptr += to_move; - cur_len -= to_move; - } - if (cur_len <= 0) { - iov++; - cur_len = iov->iov_len; - cur_ptr = iov->iov_base; - } - } - if (need_csum) { - if (slack > 0) { - unsigned int csum2; +static int +csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) +{ + skb_reader_t desc; - csum2 = skb_checksum(skb, offset, slack, 0); - csum = csum_block_add(csum, csum2, offset); - } - if ((unsigned short)csum_fold(csum)) - return -1; + desc.skb = skb; + desc.offset = sizeof(struct udphdr); + desc.count = skb->len - desc.offset; + + if (skb->ip_summed == CHECKSUM_UNNECESSARY) + goto no_checksum; + + desc.csum = csum_partial(skb->data, desc.offset, skb->csum); + xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits); + if (desc.offset != skb->len) { + unsigned int csum2; + csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); + desc.csum = csum_block_add(desc.csum, csum2, desc.offset); } + if ((unsigned short)csum_fold(desc.csum)) + return -1; + return 0; +no_checksum: + xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits); return 0; } @@ -654,9 +667,10 @@ } /* Look up and lock the request corresponding to the given XID */ + spin_lock(&xprt->sock_lock); rovr = xprt_lookup_rqst(xprt, *(u32 *) (skb->h.raw + sizeof(struct udphdr))); if (!rovr) - goto dropit; + goto out_unlock; task = rovr->rq_task; dprintk("RPC: %4d received reply\n", task->tk_pid); @@ -667,7 +681,7 @@ copied = repsize; /* Suck it into the iovec, verify checksum if not done by hw. */ - if (csum_partial_copy_to_page_cache(rovr->rq_rvec, skb, copied)) + if (csum_partial_copy_to_xdr(&rovr->rq_rcv_buf, skb)) goto out_unlock; /* Something worked... */ @@ -676,8 +690,7 @@ xprt_complete_rqst(xprt, rovr, copied); out_unlock: - rpc_unlock_task(task); - + spin_unlock(&xprt->sock_lock); dropit: skb_free_datagram(sk, skb); out: @@ -685,12 +698,6 @@ wake_up_interruptible(sk->sleep); } -typedef struct { - struct sk_buff *skb; - unsigned offset; - size_t count; -} skb_reader_t; - /* * Copy from an skb into memory and shrink the skb. */ @@ -781,50 +788,43 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) { struct rpc_rqst *req; - struct iovec *iov; - char *p; - unsigned long skip; - size_t len, used; - int n; + struct xdr_buf *rcvbuf; + size_t len; /* Find and lock the request corresponding to this xid */ + spin_lock(&xprt->sock_lock); req = xprt_lookup_rqst(xprt, xprt->tcp_xid); if (!req) { xprt->tcp_flags &= ~XPRT_COPY_DATA; dprintk("RPC: XID %08x request not found!\n", xprt->tcp_xid); + spin_unlock(&xprt->sock_lock); return; } - skip = xprt->tcp_copied; - iov = req->rq_rvec; - for (n = req->rq_rnr; n != 0; n--, iov++) { - if (skip >= iov->iov_len) { - skip -= iov->iov_len; - continue; - } - p = iov->iov_base; - len = iov->iov_len; - if (skip) { - p += skip; - len -= skip; - skip = 0; - } - if (xprt->tcp_offset + len > xprt->tcp_reclen) - len = xprt->tcp_reclen - xprt->tcp_offset; - used = tcp_copy_data(desc, p, len); - xprt->tcp_copied += used; - xprt->tcp_offset += used; - if (used != len) - break; - if (xprt->tcp_copied == req->rq_rlen) { + + rcvbuf = &req->rq_rcv_buf; + len = desc->count; + if (len > xprt->tcp_reclen - xprt->tcp_offset) { + skb_reader_t my_desc; + + len = xprt->tcp_reclen - xprt->tcp_offset; + memcpy(&my_desc, desc, sizeof(my_desc)); + my_desc.count = len; + xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, + &my_desc, tcp_copy_data); + desc->count -= len; + desc->offset += len; + } else + xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, + desc, tcp_copy_data); + xprt->tcp_copied += len; + xprt->tcp_offset += len; + + if (xprt->tcp_copied == req->rq_rlen) + xprt->tcp_flags &= ~XPRT_COPY_DATA; + else if (xprt->tcp_offset == xprt->tcp_reclen) { + if (xprt->tcp_flags & XPRT_LAST_FRAG) xprt->tcp_flags &= ~XPRT_COPY_DATA; - break; - } - if (xprt->tcp_offset == xprt->tcp_reclen) { - if (xprt->tcp_flags & XPRT_LAST_FRAG) - xprt->tcp_flags &= ~XPRT_COPY_DATA; - break; - } } if (!(xprt->tcp_flags & XPRT_COPY_DATA)) { @@ -832,7 +832,7 @@ req->rq_task->tk_pid); xprt_complete_rqst(xprt, req, xprt->tcp_copied); } - rpc_unlock_task(req->rq_task); + spin_unlock(&xprt->sock_lock); tcp_check_recm(xprt); } @@ -932,7 +932,7 @@ xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; spin_lock(&xprt->sock_lock); - if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending) + if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending) rpc_wake_up_task(xprt->snd_task); spin_unlock(&xprt->sock_lock); break; @@ -967,19 +967,30 @@ if (!sock_writeable(sk)) return; - if (!xprt_test_and_set_wspace(xprt)) { - spin_lock(&xprt->sock_lock); - if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending) - rpc_wake_up_task(xprt->snd_task); - spin_unlock(&xprt->sock_lock); - } + if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) + return; - if (test_bit(SOCK_NOSPACE, &sock->flags)) { - if (sk->sleep && waitqueue_active(sk->sleep)) { - clear_bit(SOCK_NOSPACE, &sock->flags); - wake_up_interruptible(sk->sleep); - } - } + spin_lock_bh(&xprt->sock_lock); + if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending) + rpc_wake_up_task(xprt->snd_task); + spin_unlock_bh(&xprt->sock_lock); + if (sk->sleep && waitqueue_active(sk->sleep)) + wake_up_interruptible(sk->sleep); +} + +/* + * Exponential backoff for UDP retries + */ +static inline int +xprt_expbackoff(struct rpc_task *task, struct rpc_rqst *req) +{ + int backoff; + + req->rq_ntimeo++; + backoff = min(rpc_ntimeo(&task->tk_client->cl_rtt), XPRT_MAX_BACKOFF); + if (req->rq_ntimeo < (1 << backoff)) + return 1; + return 0; } /* @@ -989,16 +1000,31 @@ xprt_timer(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + + spin_lock(&xprt->sock_lock); + if (req->rq_received) + goto out; - if (req) - xprt_adjust_cwnd(task->tk_xprt, -ETIMEDOUT); + if (!xprt->nocong) { + if (xprt_expbackoff(task, req)) { + rpc_add_timer(task, xprt_timer); + goto out_unlock; + } + rpc_inc_timeo(&task->tk_client->cl_rtt); + xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT); + } + req->rq_nresend++; dprintk("RPC: %4d xprt_timer (%s request)\n", task->tk_pid, req ? "pending" : "backlogged"); task->tk_status = -ETIMEDOUT; +out: task->tk_timeout = 0; rpc_wake_up_task(task); +out_unlock: + spin_unlock(&xprt->sock_lock); } /* @@ -1034,37 +1060,35 @@ *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); } - if (!xprt_lock_write(xprt, task)) + spin_lock_bh(&xprt->sock_lock); + if (!__xprt_lock_write(xprt, task)) { + spin_unlock_bh(&xprt->sock_lock); return; + } + if (list_empty(&req->rq_list)) { + list_add_tail(&req->rq_list, &xprt->recv); + req->rq_received = 0; + } + spin_unlock_bh(&xprt->sock_lock); -#ifdef RPC_PROFILE - req->rq_xtime = jiffies; -#endif do_xprt_transmit(task); } static void do_xprt_transmit(struct rpc_task *task) { + struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; int status, retry = 0; - /* For fast networks/servers we have to put the request on - * the pending list now: - * Note that we don't want the task timing out during the - * call to xprt_sendmsg(), so we initially disable the timeout, - * and then reset it later... - */ - xprt_receive(task); - /* Continue transmitting the packet/record. We must be careful * to cope with writespace callbacks arriving _after_ we have * called xprt_sendmsg(). */ while (1) { - xprt_clear_wspace(xprt); + req->rq_xtime = jiffies; status = xprt_sendmsg(xprt, req); if (status < 0) @@ -1078,7 +1102,7 @@ } else { if (status >= req->rq_slen) goto out_receive; - status = -ENOMEM; + status = -EAGAIN; break; } @@ -1090,31 +1114,28 @@ if (retry++ > 50) break; } - rpc_unlock_task(task); /* Note: at this point, task->tk_sleeping has not yet been set, * hence there is no danger of the waking up task being put on * schedq, and being picked up by a parallel run of rpciod(). */ - rpc_wake_up_task(task); - if (!RPC_IS_RUNNING(task)) - goto out_release; if (req->rq_received) goto out_release; task->tk_status = status; switch (status) { - case -ENOMEM: - /* Protect against (udp|tcp)_write_space */ - spin_lock_bh(&xprt->sock_lock); - if (!xprt_wspace(xprt)) { - task->tk_timeout = req->rq_timeout.to_current; - rpc_sleep_on(&xprt->sending, task, NULL, NULL); - } - spin_unlock_bh(&xprt->sock_lock); - return; case -EAGAIN: + if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { + /* Protect against races with xprt_write_space */ + spin_lock_bh(&xprt->sock_lock); + if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) { + task->tk_timeout = req->rq_timeout.to_current; + rpc_sleep_on(&xprt->pending, task, NULL, NULL); + } + spin_unlock_bh(&xprt->sock_lock); + return; + } /* Keep holding the socket if it is blocked */ rpc_delay(task, HZ>>4); return; @@ -1126,102 +1147,64 @@ if (xprt->stream) xprt_disconnect(xprt); req->rq_bytes_sent = 0; - goto out_release; } - + out_release: + xprt_release_write(xprt, task); + return; out_receive: dprintk("RPC: %4d xmit complete\n", task->tk_pid); /* Set the task's receive timeout value */ - task->tk_timeout = req->rq_timeout.to_current; - rpc_add_timer(task, xprt_timer); - rpc_unlock_task(task); - out_release: - xprt_release_write(xprt, task); -} - -/* - * Queue the task for a reply to our call. - * When the callback is invoked, the congestion window should have - * been updated already. - */ -void -xprt_receive(struct rpc_task *task) -{ - struct rpc_rqst *req = task->tk_rqstp; - struct rpc_xprt *xprt = req->rq_xprt; - - dprintk("RPC: %4d xprt_receive\n", task->tk_pid); - - req->rq_received = 0; - task->tk_timeout = 0; - rpc_sleep_locked(&xprt->pending, task, NULL, NULL); + if (!xprt->nocong) { + task->tk_timeout = rpc_calc_rto(&clnt->cl_rtt, + rpcproc_timer(clnt, task->tk_msg.rpc_proc)); + req->rq_ntimeo = 0; + if (task->tk_timeout > req->rq_timeout.to_maxval) + task->tk_timeout = req->rq_timeout.to_maxval; + } else + task->tk_timeout = req->rq_timeout.to_current; + spin_lock_bh(&xprt->sock_lock); + if (!req->rq_received) + rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); + __xprt_release_write(xprt, task); + spin_unlock_bh(&xprt->sock_lock); } /* * Reserve an RPC call slot. */ -int +void xprt_reserve(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - /* We already have an initialized request. */ - if (task->tk_rqstp) - return 0; - - dprintk("RPC: %4d xprt_reserve cong = %ld cwnd = %ld\n", - task->tk_pid, xprt->cong, xprt->cwnd); - spin_lock_bh(&xprt->xprt_lock); - xprt_reserve_status(task); - if (task->tk_rqstp) { - task->tk_timeout = 0; - } else if (!task->tk_timeout) { - task->tk_status = -ENOBUFS; - } else { - dprintk("RPC: xprt_reserve waiting on backlog\n"); - task->tk_status = -EAGAIN; - rpc_sleep_on(&xprt->backlog, task, NULL, NULL); + task->tk_status = -EIO; + if (!xprt->shutdown) { + spin_lock(&xprt->xprt_lock); + do_xprt_reserve(task); + spin_unlock(&xprt->xprt_lock); } - spin_unlock_bh(&xprt->xprt_lock); - dprintk("RPC: %4d xprt_reserve returns %d\n", - task->tk_pid, task->tk_status); - return task->tk_status; } -/* - * Reservation callback - */ -static void -xprt_reserve_status(struct rpc_task *task) +static inline void +do_xprt_reserve(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - struct rpc_rqst *req; - if (xprt->shutdown) { - task->tk_status = -EIO; - } else if (task->tk_status < 0) { - /* NOP */ - } else if (task->tk_rqstp) { - /* We've already been given a request slot: NOP */ - } else { - if (RPCXPRT_CONGESTED(xprt) || !(req = xprt->free)) - goto out_nofree; - /* OK: There's room for us. Grab a free slot and bump - * congestion value */ - xprt->free = req->rq_next; - req->rq_next = NULL; - xprt->cong += RPC_CWNDSCALE; + task->tk_status = 0; + if (task->tk_rqstp) + return; + if (xprt->free) { + struct rpc_rqst *req = xprt->free; + xprt->free = req->rq_next; + req->rq_next = NULL; task->tk_rqstp = req; xprt_request_init(task, xprt); - - if (xprt->free) - xprt_clear_backlog(xprt); + return; } - - return; - -out_nofree: + dprintk("RPC: waiting for request slot\n"); task->tk_status = -EAGAIN; + task->tk_timeout = 0; + rpc_sleep_on(&xprt->backlog, task, NULL, NULL); } /* @@ -1237,13 +1220,13 @@ xid = CURRENT_TIME << 12; dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, req, xid); - task->tk_status = 0; req->rq_timeout = xprt->timeout; req->rq_task = task; req->rq_xprt = xprt; req->rq_xid = xid++; if (!xid) xid++; + INIT_LIST_HEAD(&req->rq_list); } /* @@ -1255,27 +1238,25 @@ struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req; - if (xprt->snd_task == task) { - if (xprt->stream) - xprt_disconnect(xprt); - xprt_release_write(xprt, task); - } if (!(req = task->tk_rqstp)) return; + spin_lock_bh(&xprt->sock_lock); + __xprt_release_write(xprt, task); + __xprt_put_cong(xprt, req); + if (!list_empty(&req->rq_list)) + list_del(&req->rq_list); + spin_unlock_bh(&xprt->sock_lock); task->tk_rqstp = NULL; memset(req, 0, sizeof(*req)); /* mark unused */ dprintk("RPC: %4d release request %p\n", task->tk_pid, req); - spin_lock_bh(&xprt->xprt_lock); + spin_lock(&xprt->xprt_lock); req->rq_next = xprt->free; xprt->free = req; - /* Decrease congestion value. */ - xprt->cong -= RPC_CWNDSCALE; - xprt_clear_backlog(xprt); - spin_unlock_bh(&xprt->xprt_lock); + spin_unlock(&xprt->xprt_lock); } /* @@ -1300,7 +1281,6 @@ to->to_initval = to->to_increment = incr; to->to_maxval = incr * retr; - to->to_resrvval = incr * retr; to->to_retries = retr; to->to_exponential = 0; } @@ -1331,21 +1311,22 @@ xprt->nocong = 1; } else xprt->cwnd = RPC_INITCWND; - xprt->congtime = jiffies; spin_lock_init(&xprt->sock_lock); spin_lock_init(&xprt->xprt_lock); init_waitqueue_head(&xprt->cong_wait); + INIT_LIST_HEAD(&xprt->recv); + /* Set timeout parameters */ if (to) { xprt->timeout = *to; xprt->timeout.to_current = to->to_initval; - xprt->timeout.to_resrvval = to->to_maxval << 1; } else xprt_default_timeout(&xprt->timeout, xprt->prot); xprt->pending = RPC_INIT_WAITQ("xprt_pending"); xprt->sending = RPC_INIT_WAITQ("xprt_sending"); + xprt->resend = RPC_INIT_WAITQ("xprt_resend"); xprt->backlog = RPC_INIT_WAITQ("xprt_backlog"); /* initialize free list */ @@ -1420,6 +1401,27 @@ } /* + * Set socket buffer length + */ +void +xprt_sock_setbufsize(struct rpc_xprt *xprt) +{ + struct sock *sk = xprt->inet; + + if (xprt->stream) + return; + if (xprt->rcvsize) { + sk->userlocks |= SOCK_RCVBUF_LOCK; + sk->rcvbuf = xprt->rcvsize * RPC_MAXCONG * 2; + } + if (xprt->sndsize) { + sk->userlocks |= SOCK_SNDBUF_LOCK; + sk->sndbuf = xprt->sndsize * RPC_MAXCONG * 2; + sk->write_space(sk); + } +} + +/* * Create a client socket given the protocol and peer address. */ static struct socket * @@ -1477,6 +1479,7 @@ { xprt->shutdown = 1; rpc_wake_up(&xprt->sending); + rpc_wake_up(&xprt->resend); rpc_wake_up(&xprt->pending); rpc_wake_up(&xprt->backlog); if (waitqueue_active(&xprt->cong_wait)) @@ -1488,8 +1491,6 @@ */ int xprt_clear_backlog(struct rpc_xprt *xprt) { - if (RPCXPRT_CONGESTED(xprt)) - return 0; rpc_wake_up_next(&xprt->backlog); if (waitqueue_active(&xprt->cong_wait)) wake_up(&xprt->cong_wait);