diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/fs/Config.in linux-2.4.0-test6-rpctcp/fs/Config.in --- linux-2.4.0-test6-xprttimer/fs/Config.in Thu Jul 6 07:15:27 2000 +++ linux-2.4.0-test6-rpctcp/fs/Config.in Thu Aug 3 17:32:58 2000 @@ -78,6 +78,7 @@ dep_tristate 'NFS server support' CONFIG_NFSD $CONFIG_INET dep_mbool ' Provide NFSv3 server support' CONFIG_NFSD_V3 $CONFIG_NFSD + dep_mbool ' Provide NFS server over TCP support (DEVELOPER-ONLY)' CONFIG_NFSD_TCP $CONFIG_NFSD $CONFIG_EXPERIMENTAL if [ "$CONFIG_NFS_FS" = "y" -o "$CONFIG_NFSD" = "y" ]; then define_tristate CONFIG_SUNRPC y diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/fs/lockd/host.c linux-2.4.0-test6-rpctcp/fs/lockd/host.c --- linux-2.4.0-test6-xprttimer/fs/lockd/host.c Wed Jun 21 21:43:38 2000 +++ linux-2.4.0-test6-rpctcp/fs/lockd/host.c Thu Aug 3 17:32:58 2000 @@ -51,7 +51,7 @@ struct nlm_host * nlmsvc_lookup_host(struct svc_rqst *rqstp) { - return nlm_lookup_host(rqstp->rq_client, &rqstp->rq_addr, 0, 0); + return nlm_lookup_host(rqstp->rq_client, rqstp->rq_name, 0, 0); } /* diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/fs/lockd/svc.c linux-2.4.0-test6-rpctcp/fs/lockd/svc.c --- linux-2.4.0-test6-xprttimer/fs/lockd/svc.c Sat Jun 24 06:12:53 2000 +++ linux-2.4.0-test6-rpctcp/fs/lockd/svc.c Thu Aug 3 17:32:58 2000 @@ -60,11 +60,13 @@ * This is the lockd kernel thread */ static void -lockd(struct svc_rqst *rqstp) +lockd(struct svc_thread *thread) { - struct svc_serv *serv = rqstp->rq_server; - int err = 0; + struct svc_serv *serv = thread->th_server; + struct svc_rqst *rqstp; unsigned long grace_period_expire; + int err = 0, + count = 0; /* Lock module and set up kernel thread */ MOD_INC_USE_COUNT; @@ -144,18 +146,19 @@ * Find a socket with data available and call its * recvfrom routine. */ - if ((err = svc_recv(serv, rqstp, timeout)) == -EAGAIN) + rqstp = svc_recv(thread); + if (!rqstp) { + err = svc_sleep(thread, timeout); + if (err == -EINTR) + break; + count = 0; continue; - if (err < 0) { - if (err != -EINTR) - printk(KERN_WARNING - "lockd: terminating on error %d\n", - -err); - break; } + if (IS_ERR(rqstp)) + continue; dprintk("lockd: request from %08x\n", - (unsigned)ntohl(rqstp->rq_addr.sin_addr.s_addr)); + (unsigned)ntohl(rqstp->rq_name->sin_addr.s_addr)); /* * Look up the NFS client handle. The handle is needed for @@ -165,7 +168,7 @@ if (nlmsvc_ops) { nlmsvc_ops->exp_readlock(); rqstp->rq_client = - nlmsvc_ops->exp_getclient(&rqstp->rq_addr); + nlmsvc_ops->exp_getclient(rqstp->rq_name); } svc_process(serv, rqstp); @@ -173,6 +176,11 @@ /* Unlock export hash tables */ if (nlmsvc_ops) nlmsvc_ops->exp_unlock(); + + if (++count >= 200 || current->need_resched) { + schedule(); + count = 0; + } } /* @@ -188,7 +196,7 @@ wake_up(&lockd_exit); /* Exit the RPC thread */ - svc_exit_thread(rqstp); + svc_exit_thread(thread); /* release rpciod */ rpciod_down(); diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/fs/lockd/svc4proc.c linux-2.4.0-test6-rpctcp/fs/lockd/svc4proc.c --- linux-2.4.0-test6-xprttimer/fs/lockd/svc4proc.c Mon Apr 3 22:24:05 2000 +++ linux-2.4.0-test6-rpctcp/fs/lockd/svc4proc.c Thu Aug 3 17:32:58 2000 @@ -44,8 +44,8 @@ if (rqstp->rq_client == NULL) { printk(KERN_NOTICE "lockd: unauthenticated request from (%08x:%d)\n", - ntohl(rqstp->rq_addr.sin_addr.s_addr), - ntohs(rqstp->rq_addr.sin_port)); + ntohl(rqstp->rq_name->sin_addr.s_addr), + ntohs(rqstp->rq_name->sin_port)); return nlm_lck_denied_nolocks; } @@ -419,7 +419,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, void *resp) { - struct sockaddr_in saddr = rqstp->rq_addr; + struct sockaddr_in saddr = *rqstp->rq_name; struct nlm_host *host; dprintk("lockd: SM_NOTIFY called\n"); @@ -427,8 +427,8 @@ || ntohs(saddr.sin_port) >= 1024) { printk(KERN_WARNING "lockd: rejected NSM callback from %08x:%d\n", - ntohl(rqstp->rq_addr.sin_addr.s_addr), - ntohs(rqstp->rq_addr.sin_port)); + ntohl(rqstp->rq_name->sin_addr.s_addr), + ntohs(rqstp->rq_name->sin_port)); return rpc_system_err; } @@ -467,7 +467,7 @@ if (!(call = nlmclnt_alloc_call())) return rpc_system_err; - host = nlmclnt_lookup_host(&rqstp->rq_addr, + host = nlmclnt_lookup_host(rqstp->rq_name, rqstp->rq_prot, rqstp->rq_vers); if (!host) { kfree(call); diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/fs/lockd/svclock.c linux-2.4.0-test6-rpctcp/fs/lockd/svclock.c --- linux-2.4.0-test6-xprttimer/fs/lockd/svclock.c Thu Jun 29 23:06:47 2000 +++ linux-2.4.0-test6-rpctcp/fs/lockd/svclock.c Thu Aug 3 17:32:58 2000 @@ -161,7 +161,7 @@ struct nlm_rqst *call; /* Create host handle for callback */ - host = nlmclnt_lookup_host(&rqstp->rq_addr, + host = nlmclnt_lookup_host(rqstp->rq_name, rqstp->rq_prot, rqstp->rq_vers); if (host == NULL) return NULL; diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/fs/lockd/svcproc.c linux-2.4.0-test6-rpctcp/fs/lockd/svcproc.c --- linux-2.4.0-test6-xprttimer/fs/lockd/svcproc.c Mon Apr 3 22:24:06 2000 +++ linux-2.4.0-test6-rpctcp/fs/lockd/svcproc.c Thu Aug 3 17:32:58 2000 @@ -70,8 +70,8 @@ if (rqstp->rq_client == NULL) { printk(KERN_NOTICE "lockd: unauthenticated request from (%08x:%d)\n", - ntohl(rqstp->rq_addr.sin_addr.s_addr), - ntohs(rqstp->rq_addr.sin_port)); + ntohl(rqstp->rq_name->sin_addr.s_addr), + ntohs(rqstp->rq_name->sin_port)); return nlm_lck_denied_nolocks; } @@ -444,7 +444,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, void *resp) { - struct sockaddr_in saddr = rqstp->rq_addr; + struct sockaddr_in saddr = *rqstp->rq_name; struct nlm_host *host; dprintk("lockd: SM_NOTIFY called\n"); @@ -452,8 +452,8 @@ || ntohs(saddr.sin_port) >= 1024) { printk(KERN_WARNING "lockd: rejected NSM callback from %08x:%d\n", - ntohl(rqstp->rq_addr.sin_addr.s_addr), - ntohs(rqstp->rq_addr.sin_port)); + ntohl(rqstp->rq_name->sin_addr.s_addr), + ntohs(rqstp->rq_name->sin_port)); return rpc_system_err; } @@ -492,7 +492,7 @@ if (!(call = nlmclnt_alloc_call())) return rpc_system_err; - host = nlmclnt_lookup_host(&rqstp->rq_addr, + host = nlmclnt_lookup_host(rqstp->rq_name, rqstp->rq_prot, rqstp->rq_vers); if (!host) { kfree(call); diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/fs/nfsd/nfscache.c linux-2.4.0-test6-rpctcp/fs/nfsd/nfscache.c --- linux-2.4.0-test6-xprttimer/fs/nfsd/nfscache.c Thu Jul 6 07:15:27 2000 +++ linux-2.4.0-test6-rpctcp/fs/nfsd/nfscache.c Thu Aug 3 17:32:58 2000 @@ -192,7 +192,7 @@ xid == rp->c_xid && proc == rp->c_proc && proto == rp->c_prot && vers == rp->c_vers && time_before(jiffies, rp->c_timestamp + 120*HZ) && - memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, rqstp->rq_addrlen)==0) { + memcmp((char*)&rqstp->rq_name, (char*)&rp->c_addr, rqstp->rq_namelen)==0) { nfsdstats.rchits++; goto found_entry; } @@ -229,7 +229,7 @@ rp->c_state = RC_INPROG; rp->c_xid = xid; rp->c_proc = proc; - rp->c_addr = rqstp->rq_addr; + memcpy(&rp->c_addr, rqstp->rq_name, sizeof(rp->c_addr)); rp->c_prot = proto; rp->c_vers = vers; rp->c_timestamp = jiffies; diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/fs/nfsd/nfsfh.c linux-2.4.0-test6-rpctcp/fs/nfsd/nfsfh.c --- linux-2.4.0-test6-xprttimer/fs/nfsd/nfsfh.c Sun Jul 9 04:26:13 2000 +++ linux-2.4.0-test6-rpctcp/fs/nfsd/nfsfh.c Thu Aug 3 17:32:58 2000 @@ -560,8 +560,8 @@ if (!rqstp->rq_secure && EX_SECURE(exp)) { printk(KERN_WARNING "nfsd: request from insecure port (%08x:%d)!\n", - ntohl(rqstp->rq_addr.sin_addr.s_addr), - ntohs(rqstp->rq_addr.sin_port)); + ntohl(rqstp->rq_name->sin_addr.s_addr), + ntohs(rqstp->rq_name->sin_port)); goto out; } diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/fs/nfsd/nfsproc.c linux-2.4.0-test6-rpctcp/fs/nfsd/nfsproc.c --- linux-2.4.0-test6-xprttimer/fs/nfsd/nfsproc.c Mon Jun 26 20:44:15 2000 +++ linux-2.4.0-test6-rpctcp/fs/nfsd/nfsproc.c Thu Aug 3 17:32:59 2000 @@ -143,8 +143,8 @@ if ((avail << 2) < argp->count) { printk(KERN_NOTICE "oversized read request from %08x:%d (%d bytes)\n", - ntohl(rqstp->rq_addr.sin_addr.s_addr), - ntohs(rqstp->rq_addr.sin_port), + ntohl(rqstp->rq_name->sin_addr.s_addr), + ntohs(rqstp->rq_name->sin_port), argp->count); argp->count = avail << 2; } diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/fs/nfsd/nfssvc.c linux-2.4.0-test6-rpctcp/fs/nfsd/nfssvc.c --- linux-2.4.0-test6-xprttimer/fs/nfsd/nfssvc.c Wed Jul 19 08:04:06 2000 +++ linux-2.4.0-test6-rpctcp/fs/nfsd/nfssvc.c Thu Aug 3 17:32:59 2000 @@ -36,11 +36,21 @@ #define NFSDDBG_FACILITY NFSDDBG_SVC #define NFSD_BUFSIZE (1024 + NFSSVC_MAXBLKSIZE) +#ifdef CONFIG_NFSD_V3 +#include +#ifndef MAX +# define MAX(a, b) (((a) > (b))? (a) : (b)) +#endif +#define NFSD_XDRSIZE MAX(NFSSVC_XDRSIZE,NFS3_SVC_XDRSIZE) +#else +#define NFSD_XDRSIZE NFSSVC_XDRSIZE +#endif + #define ALLOWED_SIGS (sigmask(SIGKILL)) #define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGQUIT)) extern struct svc_program nfsd_program; -static void nfsd(struct svc_rqst *rqstp); +static void nfsd(struct svc_thread *thread); struct timeval nfssvc_boot = { 0, 0 }; static struct svc_serv *nfsd_serv = NULL; static int nfsd_busy = 0; @@ -76,14 +86,14 @@ if (error<0) goto out; if (!nfsd_serv) { - nfsd_serv = svc_create(&nfsd_program, NFSD_BUFSIZE, NFSSVC_XDRSIZE); + nfsd_serv = svc_create(&nfsd_program, NFSD_BUFSIZE, NFSD_XDRSIZE); if (nfsd_serv == NULL) goto out; error = svc_makesock(nfsd_serv, IPPROTO_UDP, port); if (error < 0) goto failure; -#if 0 /* Don't even pretend that TCP works. It doesn't. */ +#if CONFIG_NFSD_TCP error = svc_makesock(nfsd_serv, IPPROTO_TCP, port); if (error < 0) goto failure; @@ -140,11 +150,13 @@ * This is the NFS server kernel thread */ static void -nfsd(struct svc_rqst *rqstp) +nfsd(struct svc_thread *thread) { - struct svc_serv *serv = rqstp->rq_server; - int err; + struct svc_serv *serv = thread->th_server; + struct svc_rqst *rqstp; + long err; struct nfsd_list me; + int count = 0; /* Lock module and set up kernel thread */ MOD_INC_USE_COUNT; @@ -156,8 +168,6 @@ current->fs->umask = 0; nfsdstats.th_cnt++; - /* Let svc_process check client's authentication. */ - rqstp->rq_auth = 1; lockd_up(); /* start lockd */ @@ -178,11 +188,16 @@ * Find a socket with data available and call its * recvfrom routine. */ - while ((err = svc_recv(serv, rqstp, - MAX_SCHEDULE_TIMEOUT)) == -EAGAIN) - ; - if (err < 0) - break; + rqstp = svc_recv(thread); + if (!rqstp) { + err = svc_sleep(thread, MAX_SCHEDULE_TIMEOUT); + if (err == -EINTR) + break; + count = 0; + continue; + } + if (IS_ERR(rqstp)) + continue; update_thread_usage(nfsd_busy); nfsd_busy++; @@ -192,23 +207,29 @@ /* Validate the client's address. This will also defeat * port probes on port 2049 by unauthorized clients. */ - rqstp->rq_client = exp_getclient(&rqstp->rq_addr); + rqstp->rq_client = exp_getclient(rqstp->rq_name); /* Process request with signals blocked. */ spin_lock_irq(¤t->sigmask_lock); siginitsetinv(¤t->blocked, ALLOWED_SIGS); recalc_sigpending(current); spin_unlock_irq(¤t->sigmask_lock); + /* Let svc_process check client's authentication. */ + rqstp->rq_auth = 1; svc_process(serv, rqstp); /* Unlock export hash tables */ exp_unlock(); update_thread_usage(nfsd_busy); nfsd_busy--; + if (++count >= 200 || current->need_resched) { + count = 0; + schedule(); + } } if (err != -EINTR) { - printk(KERN_WARNING "nfsd: terminating on error %d\n", -err); + printk(KERN_WARNING "nfsd: terminating on error %ld\n", -err); } else { unsigned int signo; @@ -231,7 +252,7 @@ nfsdstats.th_cnt --; /* Release the thread */ - svc_exit_thread(rqstp); + svc_exit_thread(thread); /* Release module */ MOD_DEC_USE_COUNT; diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/include/linux/sunrpc/svc.h linux-2.4.0-test6-rpctcp/include/linux/sunrpc/svc.h --- linux-2.4.0-test6-xprttimer/include/linux/sunrpc/svc.h Fri Jul 28 02:02:47 2000 +++ linux-2.4.0-test6-rpctcp/include/linux/sunrpc/svc.h Thu Aug 3 17:53:57 2000 @@ -11,6 +11,8 @@ #define SUNRPC_SVC_H #include +#include +#include #include #include #include @@ -26,12 +28,15 @@ * We currently do not support more than one RPC program per daemon. */ struct svc_serv { - struct svc_rqst * sv_threads; /* idle server threads */ + struct svc_thread * sv_threads; /* idle server threads */ + struct svc_rqst * sv_requests; /* idle request buffers */ + struct svc_rqst * sv_sched; /* pending request buffers */ struct svc_sock * sv_sockets; /* pending sockets */ struct svc_program * sv_program; /* RPC program */ struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; unsigned int sv_nrthreads; /* # of server threads */ + unsigned int sv_nrrequests; /* # of request buffers */ unsigned int sv_bufsz; /* datagram buffer size */ unsigned int sv_xdrsize; /* XDR buffer size */ @@ -71,13 +76,13 @@ struct svc_buf { u32 * area; /* allocated memory */ u32 * base; /* base of RPC datagram */ - int buflen; /* total length of buffer */ + unsigned int buflen; /* total length of buffer */ u32 * buf; /* read/write pointer */ - int len; /* current end of buffer */ + unsigned int len; /* current end of buffer */ /* iovec for zero-copy NFS READs */ struct iovec iov[RPCSVC_MAXIOV]; - int nriov; + unsigned int nriov; }; #define svc_getlong(argp, val) { (val) = *(argp)->buf++; (argp)->len--; } #define svc_putlong(resp, val) { *(resp)->buf++ = (val); (resp)->len++; } @@ -87,18 +92,32 @@ * processed. * NOTE: First two items must be prev/next. */ +struct svc_thread { + struct svc_thread * th_prev; /* Idle list */ + struct svc_thread * th_next; + + struct svc_serv * th_server; /* RPC service definition */ + volatile unsigned char th_queued : 1; /* Are we queued */ + + wait_queue_head_t th_wait; /* synchronization */ +}; + + struct svc_rqst { struct svc_rqst * rq_prev; /* idle list */ struct svc_rqst * rq_next; struct svc_sock * rq_sock; /* socket */ struct sockaddr_in rq_addr; /* peer address */ - int rq_addrlen; + struct sockaddr_in * rq_name; + int rq_namelen; + struct svc_thread * rq_thread; /* Backpointer to thread */ struct svc_serv * rq_server; /* RPC service definition */ struct svc_procedure * rq_procinfo; /* procedure info */ struct svc_cred rq_cred; /* auth info */ struct sk_buff * rq_skbuff; /* fast recv inet buffer */ - struct svc_buf rq_defbuf; /* default buffer */ + struct svc_buf rq_defabuf; /* default argument buffer */ + struct svc_buf rq_defrbuf; /* default result buffer */ struct svc_buf rq_argbuf; /* argument buffer */ struct svc_buf rq_resbuf; /* result buffer */ u32 rq_xid; /* transmission id */ @@ -109,7 +128,9 @@ unsigned short rq_verfed : 1, /* reply has verifier */ rq_userset : 1, /* auth->setuser OK */ rq_secure : 1, /* secure port */ - rq_auth : 1; /* check client */ + rq_auth : 1, /* check client */ + rq_processed : 1, /* for write space */ + rq_oneshot : 1; /* dispose after use */ void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ @@ -118,7 +139,6 @@ struct svc_client * rq_client; /* RPC peer info */ struct svc_cacherep * rq_cacherep; /* cache info */ - wait_queue_head_t rq_wait; /* synchronozation */ }; /* @@ -167,17 +187,87 @@ /* * This is the RPC server thread function prototype */ -typedef void (*svc_thread_fn)(struct svc_rqst *); +typedef void (*svc_thread_fn)(struct svc_thread *); /* * Function prototypes. */ struct svc_serv * svc_create(struct svc_program *, unsigned int, unsigned int); int svc_create_thread(svc_thread_fn, struct svc_serv *); -void svc_exit_thread(struct svc_rqst *); +void svc_exit_thread(struct svc_thread *); +struct svc_rqst * svc_create_request(struct svc_serv *); +void svc_delete_request(struct svc_rqst *); void svc_destroy(struct svc_serv *); int svc_process(struct svc_serv *, struct svc_rqst *); int svc_register(struct svc_serv *, int, unsigned short); +long svc_sleep(struct svc_thread *, long); +void __svc_wake_up(struct svc_serv *); void svc_wake_up(struct svc_serv *); + +static inline void +__svc_pool_request(struct svc_serv *serv, struct svc_rqst *rqstp) +{ + rpc_insert_list(&serv->sv_requests, rqstp); +} + +static inline void +svc_pool_request(struct svc_serv *serv, struct svc_rqst *rqstp) +{ + spin_lock_bh(&serv->sv_lock); + __svc_pool_request(serv, rqstp); + spin_unlock_bh(&serv->sv_lock); +} + +static inline struct svc_rqst * +__svc_unpool_request(struct svc_serv *serv) +{ + struct svc_rqst *rqstp = serv->sv_requests; + if (rqstp) + rpc_remove_list(&serv->sv_requests, rqstp); + return rqstp; +} + +static inline struct svc_rqst * +svc_unpool_request(struct svc_serv *serv) +{ + struct svc_rqst *rqstp; + spin_lock_bh(&serv->sv_lock); + rqstp = __svc_unpool_request(serv); + spin_unlock_bh(&serv->sv_lock); + return rqstp; +} + +static inline void +__svc_schedule_request(struct svc_serv *serv, struct svc_rqst *rqstp) +{ + rpc_append_list(&serv->sv_sched, rqstp); +} + +static inline void +svc_schedule_request(struct svc_serv *serv, struct svc_rqst *rqstp) +{ + spin_lock_bh(&serv->sv_lock); + __svc_schedule_request(serv, rqstp); + spin_unlock_bh(&serv->sv_lock); +} + +static inline struct svc_rqst * +__svc_unsched_request(struct svc_serv *serv) +{ + struct svc_rqst *rqstp = serv->sv_sched; + if (rqstp) + rpc_remove_list(&serv->sv_sched, rqstp); + return rqstp; +} + +static inline struct svc_rqst * +svc_unsched_request(struct svc_serv *serv) +{ + struct svc_rqst *rqstp; + spin_lock_bh(&serv->sv_lock); + rqstp = __svc_unsched_request(serv); + spin_unlock_bh(&serv->sv_lock); + return rqstp; +} #endif /* SUNRPC_SVC_H */ diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/include/linux/sunrpc/svcsock.h linux-2.4.0-test6-rpctcp/include/linux/sunrpc/svcsock.h --- linux-2.4.0-test6-xprttimer/include/linux/sunrpc/svcsock.h Fri Jul 28 02:02:57 2000 +++ linux-2.4.0-test6-rpctcp/include/linux/sunrpc/svcsock.h Thu Aug 3 17:53:57 2000 @@ -9,7 +9,9 @@ #ifndef SUNRPC_SVCSOCK_H #define SUNRPC_SVCSOCK_H +#include #include +#include /* * RPC server socket. @@ -21,30 +23,36 @@ struct svc_sock * sk_list; /* list of all sockets */ struct socket * sk_sock; /* berkeley socket layer */ struct sock * sk_sk; /* INET layer */ + struct sockaddr_in sk_addr; /* peer address */ + int sk_addrlen; spinlock_t sk_lock; + struct semaphore sk_sem; struct svc_serv * sk_server; /* service for this socket */ - unsigned char sk_inuse; /* use count */ - unsigned char sk_busy; /* enqueued/receiving */ - unsigned char sk_conn; /* conn pending */ - unsigned char sk_close; /* dead or dying */ + + struct timer_list sk_timer; /* kernel timer */ + unsigned long sk_timeout; /* Timeout for this socket */ + + int sk_inuse; /* use count */ + int sk_conn; /* conn pending */ int sk_data; /* data pending */ - unsigned int sk_temp : 1, /* temp socket */ + unsigned char sk_busy : 1, /* enqueued/receiving */ sk_qued : 1, /* on serv->sk_sockets */ + sk_temp : 1, /* temp socket */ sk_dead : 1; /* socket closed */ + volatile unsigned char sk_close: 1; /* dead or dying */ + int (*sk_recvfrom)(struct svc_rqst *rqstp); int (*sk_sendto)(struct svc_rqst *rqstp); /* We keep the old state_change and data_ready CB's here */ void (*sk_ostate)(struct sock *); - void (*sk_odata)(struct sock *, int bytes); + void (*sk_odata)(struct sock *, int); + void (*sk_owspace)(struct sock *); /* private TCP part */ - int sk_reclen; /* length of record */ - int sk_tcplen; /* current read length */ - - /* Debugging */ - struct svc_rqst * sk_rqstp; + u32 sk_reclen; /* length of record */ + u32 sk_tcplen; /* current read length */ }; /* @@ -52,7 +60,7 @@ */ int svc_makesock(struct svc_serv *, int, unsigned short); void svc_delete_socket(struct svc_sock *); -int svc_recv(struct svc_serv *, struct svc_rqst *, long); +struct svc_rqst* svc_recv(struct svc_thread *); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/net/sunrpc/sunrpc_syms.c linux-2.4.0-test6-rpctcp/net/sunrpc/sunrpc_syms.c --- linux-2.4.0-test6-xprttimer/net/sunrpc/sunrpc_syms.c Sat Apr 22 01:08:52 2000 +++ linux-2.4.0-test6-rpctcp/net/sunrpc/sunrpc_syms.c Thu Aug 3 17:32:59 2000 @@ -74,6 +74,7 @@ EXPORT_SYMBOL(svc_drop); EXPORT_SYMBOL(svc_process); EXPORT_SYMBOL(svc_recv); +EXPORT_SYMBOL(svc_sleep); EXPORT_SYMBOL(svc_wake_up); EXPORT_SYMBOL(svc_makesock); diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/net/sunrpc/svc.c linux-2.4.0-test6-rpctcp/net/sunrpc/svc.c --- linux-2.4.0-test6-xprttimer/net/sunrpc/svc.c Wed Jun 21 21:43:37 2000 +++ linux-2.4.0-test6-rpctcp/net/sunrpc/svc.c Thu Aug 3 17:32:59 2000 @@ -23,6 +23,8 @@ #define RPCDBG_FACILITY RPCDBG_SVCDSP #define RPC_PARANOIA 1 +#define SVC_SLACK_SPACE 512 /* Maximal RPC header size */ + /* * Create an RPC service */ @@ -43,7 +45,7 @@ serv->sv_program = prog; serv->sv_nrthreads = 1; serv->sv_stats = prog->pg_stats; - serv->sv_bufsz = bufsize? bufsize : 4096; + serv->sv_bufsz = bufsize? (bufsize + SVC_SLACK_SPACE) : 4096; serv->sv_xdrsize = xdrsize; spin_lock_init(&serv->sv_lock); @@ -61,6 +63,7 @@ void svc_destroy(struct svc_serv *serv) { + struct svc_rqst *rqstp; struct svc_sock *svsk; dprintk("RPC: svc_destroy(%s, %d)\n", @@ -76,6 +79,12 @@ while ((svsk = serv->sv_allsocks) != NULL) svc_delete_socket(svsk); + while (((rqstp = svc_unpool_request(serv)) != NULL) + || ((rqstp = svc_unsched_request(serv)) != NULL)) { + svc_drop(rqstp); + svc_delete_request(rqstp); + } + /* Unregister service with the portmapper */ svc_register(serv, 0, 0); kfree(serv); @@ -86,7 +95,7 @@ * Later versions may do nifty things by allocating multiple pages * of memory directly and putting them into the bufp->iov. */ -int +static inline int svc_init_buffer(struct svc_buf *bufp, unsigned int size) { if (!(bufp->area = (u32 *) kmalloc(size, GFP_KERNEL))) @@ -106,7 +115,7 @@ /* * Release an RPC server buffer */ -void +static inline void svc_release_buffer(struct svc_buf *bufp) { kfree(bufp->area); @@ -114,29 +123,77 @@ } /* + * Destroy an RPC server thread + */ +void +svc_delete_request(struct svc_rqst *rqstp) +{ + struct svc_serv *serv = rqstp->rq_server; + serv->sv_nrrequests--; + svc_release_buffer(&rqstp->rq_defabuf); + svc_release_buffer(&rqstp->rq_defrbuf); + if (rqstp->rq_resp) + kfree(rqstp->rq_resp); + if (rqstp->rq_argp) + kfree(rqstp->rq_argp); + kfree(rqstp); +} + +/* * Create a server thread */ -int -svc_create_thread(svc_thread_fn func, struct svc_serv *serv) +struct svc_rqst* +svc_create_request(struct svc_serv *serv) { - struct svc_rqst *rqstp; - int error = -ENOMEM; + struct svc_rqst *rqstp = NULL; rqstp = kmalloc(sizeof(*rqstp), GFP_KERNEL); if (!rqstp) goto out; - memset(rqstp, 0, sizeof(*rqstp)); - init_waitqueue_head(&rqstp->rq_wait); + rqstp->rq_server = serv; + rqstp->rq_name = &rqstp->rq_addr; + serv->sv_nrrequests++; if (!(rqstp->rq_argp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL)) || !(rqstp->rq_resp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL)) - || !svc_init_buffer(&rqstp->rq_defbuf, serv->sv_bufsz)) - goto out_thread; + || !svc_init_buffer(&rqstp->rq_defabuf, serv->sv_bufsz) + || !svc_init_buffer(&rqstp->rq_defrbuf, serv->sv_bufsz)) + goto out_err; + memcpy(&rqstp->rq_argbuf, &rqstp->rq_defabuf, sizeof(rqstp->rq_argbuf)); + memcpy(&rqstp->rq_resbuf, &rqstp->rq_defrbuf, sizeof(rqstp->rq_resbuf)); + out: + return rqstp; + out_err: + svc_delete_request(rqstp); + return NULL; +} + +/* + * Create a server thread + */ +int +svc_create_thread(svc_thread_fn func, struct svc_serv *serv) +{ + struct svc_thread *thread = NULL; + struct svc_rqst *rqstp = NULL; + int error = -ENOMEM; + thread = kmalloc(sizeof(*thread), GFP_KERNEL); + if (!thread) + goto out; + memset(thread, 0, sizeof(*thread)); + init_waitqueue_head(&thread->th_wait); + thread->th_server = serv; serv->sv_nrthreads++; - rqstp->rq_server = serv; - error = kernel_thread((int (*)(void *)) func, rqstp, 0); + + if (serv->sv_nrrequests < serv->sv_nrthreads) { + rqstp = svc_create_request(serv); + if (!rqstp) + goto out_thread; + svc_pool_request(serv, rqstp); + } + error = kernel_thread((int (*)(void *)) func, thread, 0); if (error < 0) goto out_thread; error = 0; @@ -144,7 +201,7 @@ return error; out_thread: - svc_exit_thread(rqstp); + svc_exit_thread(thread); goto out; } @@ -152,20 +209,113 @@ * Destroy an RPC server thread */ void -svc_exit_thread(struct svc_rqst *rqstp) +svc_exit_thread(struct svc_thread *thread) { - struct svc_serv *serv = rqstp->rq_server; - - svc_release_buffer(&rqstp->rq_defbuf); - if (rqstp->rq_resp) - kfree(rqstp->rq_resp); - if (rqstp->rq_argp) - kfree(rqstp->rq_argp); - kfree(rqstp); + struct svc_serv *serv = thread->th_server; + kfree(thread); /* Release the server */ if (serv) svc_destroy(serv); +} + +/* + * Dequeue an nfsd thread. Must have serv->sv_lock held. + */ +static inline void +__svc_serv_dequeue(struct svc_serv *serv, struct svc_thread *thread) +{ + if (thread->th_queued) { + rpc_remove_list(&serv->sv_threads, thread); + thread->th_queued = 0; + } +} + +static void +svc_serv_dequeue(struct svc_serv *serv, struct svc_thread *thread) +{ + spin_lock_bh(&serv->sv_lock); + __svc_serv_dequeue(serv, thread); + spin_unlock_bh(&serv->sv_lock); +} + +/* + * Queue up an idle server thread. Must have serv->sv_lock held. + */ +static void +svc_serv_enqueue(struct svc_serv *serv, struct svc_thread *thread) +{ + spin_lock_bh(&serv->sv_lock); + if (serv->sv_sched != NULL) { + __svc_serv_dequeue(serv, thread); + goto out; + } + if (thread->th_queued) + goto out; + rpc_insert_list(&serv->sv_threads, thread); + thread->th_queued = 1; + out: + spin_unlock_bh(&serv->sv_lock); +} + + +long svc_sleep(struct svc_thread *thread, long timeout) +{ + struct svc_serv *serv = thread->th_server; + DECLARE_WAITQUEUE(wait, current); + long ret = timeout; + + + add_wait_queue(&thread->th_wait, &wait); + svc_serv_enqueue(serv, thread); + + /* + * We have to be able to interrupt this wait + * in order to bring down the daemons ... + */ + set_current_state(TASK_INTERRUPTIBLE); + + if (!thread->th_queued) + goto out; + + if (timeout <= 0) { + ret = -EAGAIN; + goto out; + } + if (!signalled()) + ret = schedule_timeout(timeout); + else + ret = -EINTR; + out: + svc_serv_dequeue(serv, thread); + remove_wait_queue(&thread->th_wait, &wait); + set_current_state(TASK_RUNNING); + + return ret; +} + +/* + * External function to wake up a server waiting for data + * Must be called with serv->sv_lock held... + */ +void +__svc_wake_up(struct svc_serv *serv) +{ + struct svc_thread *thread; + + if ((thread = serv->sv_threads) != NULL) { + dprintk("svc: daemon %p woken up.\n", thread); + __svc_serv_dequeue(serv, thread); + wake_up(&thread->th_wait); + } +} + +void +svc_wake_up(struct svc_serv *serv) +{ + spin_lock_bh(&serv->sv_lock); + __svc_wake_up(serv); + spin_unlock_bh(&serv->sv_lock); } /* diff -u --recursive --new-file linux-2.4.0-test6-xprttimer/net/sunrpc/svcsock.c linux-2.4.0-test6-rpctcp/net/sunrpc/svcsock.c --- linux-2.4.0-test6-xprttimer/net/sunrpc/svcsock.c Sat Jul 8 00:57:49 2000 +++ linux-2.4.0-test6-rpctcp/net/sunrpc/svcsock.c Thu Aug 3 17:32:59 2000 @@ -51,46 +51,85 @@ #define RPCDBG_FACILITY RPCDBG_SVCSOCK +#define RPCSVC_TIMEOUT 360*HZ +#define RPCSVC_SEND_TIMEOUT 60*HZ + static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, int *errp, int pmap_reg); -static void svc_udp_data_ready(struct sock *, int); -static int svc_udp_recvfrom(struct svc_rqst *); -static int svc_udp_sendto(struct svc_rqst *); - +static void __svc_sock_enqueue(struct svc_sock *); /* - * Queue up an idle server thread. Must have serv->sv_lock held. + * Release an skbuff after use */ static inline void -svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp) +svc_release_skb(struct svc_rqst *rqstp) { - rpc_append_list(&serv->sv_threads, rqstp); + struct sk_buff *skb = rqstp->rq_skbuff; + + if (!skb) + return; + rqstp->rq_skbuff = NULL; + + dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); + skb_free_datagram(rqstp->rq_sock->sk_sk, skb); } +typedef void (*svc_sk_action)(struct svc_sock *); + /* - * Dequeue an nfsd thread. Must have serv->sv_lock held. + * Mark a socket as being closed */ static inline void -svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp) +__svc_sock_close(struct svc_sock *svsk) { - rpc_remove_list(&serv->sv_threads, rqstp); + svsk->sk_close = 1; +} + +static inline void +svc_sock_close(struct svc_sock *svsk) +{ + spin_lock_bh(&svsk->sk_lock); + __svc_sock_close(svsk); + spin_unlock_bh(&svsk->sk_lock); } /* - * Release an skbuff after use + * Set the timeout value on a socket */ static inline void -svc_release_skb(struct svc_rqst *rqstp) +__svc_sock_add_timer(struct svc_sock *svsk, unsigned long timeout) { - struct sk_buff *skb = rqstp->rq_skbuff; - - if (!skb) + if ((svsk->sk_timeout = timeout) == 0) return; - rqstp->rq_skbuff = NULL; + mod_timer(&svsk->sk_timer, jiffies + svsk->sk_timeout); +} - dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); - skb_free_datagram(rqstp->rq_sock->sk_sk, skb); +static inline void +__svc_sock_disable_timer(struct svc_sock *svsk) +{ + svsk->sk_timeout = 0; +} + +static inline void +svc_sock_delete_timer(struct svc_sock *svsk) +{ + if (timer_pending(&svsk->sk_timer)) + del_timer_sync(&svsk->sk_timer); +} + +static void +svc_sock_timeout(struct svc_sock *svsk) +{ + dprintk("svc: socket timed out. Disconnecting...\n"); + + spin_lock_bh(&svsk->sk_lock); + if (svsk->sk_timeout) { + __svc_sock_disable_timer(svsk); + __svc_sock_close(svsk); + __svc_sock_enqueue(svsk); + } + spin_unlock_bh(&svsk->sk_lock); } /* @@ -100,22 +139,18 @@ * This must be called with svsk->sk_lock held. */ static void -svc_sock_enqueue(struct svc_sock *svsk) +__svc_sock_enqueue(struct svc_sock *svsk) { - struct svc_serv *serv = svsk->sk_server; - struct svc_rqst *rqstp; + struct svc_serv *serv = svsk->sk_server; + struct svc_rqst *rqstp; - /* NOTE: Local BH is already disabled by our caller. */ - spin_lock(&serv->sv_lock); - if (serv->sv_threads && serv->sv_sockets) - printk(KERN_ERR - "svc_sock_enqueue: threads and sockets both waiting??\n"); + __svc_sock_disable_timer(svsk); if (svsk->sk_busy) { /* Don't enqueue socket while daemon is receiving */ dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); - goto out_unlock; + return; } /* Mark socket as busy. It will remain in this state until the @@ -124,44 +159,47 @@ */ svsk->sk_busy = 1; - if ((rqstp = serv->sv_threads) != NULL) { - dprintk("svc: socket %p served by daemon %p\n", - svsk->sk_sk, rqstp); - svc_serv_dequeue(serv, rqstp); + /* NOTE: Local BH is already disabled by our caller. */ + spin_lock(&serv->sv_lock); + + if ((rqstp = __svc_unpool_request(serv)) != NULL) { if (rqstp->rq_sock) printk(KERN_ERR "svc_sock_enqueue: server %p, rq_sock=%p!\n", rqstp, rqstp->rq_sock); rqstp->rq_sock = svsk; svsk->sk_inuse++; - wake_up(&rqstp->rq_wait); - } else { - dprintk("svc: socket %p put into queue\n", svsk->sk_sk); - rpc_append_list(&serv->sv_sockets, svsk); - svsk->sk_qued = 1; + __svc_schedule_request(serv, rqstp); + __svc_wake_up(serv); + dprintk("svc: socket %p and request %p scheduled\n", svsk, rqstp); + goto out; } -out_unlock: + dprintk("svc: socket %p put into queue\n", svsk->sk_sk); + rpc_append_list(&serv->sv_sockets, svsk); + svsk->sk_qued = 1; + + out: spin_unlock(&serv->sv_lock); + return; } /* * Dequeue the first socket. Must be called with the serv->sv_lock held. */ -static inline struct svc_sock * -svc_sock_dequeue(struct svc_serv *serv) +static struct svc_sock * +__svc_sock_dequeue(struct svc_serv *serv) { struct svc_sock *svsk; - if ((svsk = serv->sv_sockets) != NULL) + if ((svsk = serv->sv_sockets) != NULL) { rpc_remove_list(&serv->sv_sockets, svsk); - if (svsk) { dprintk("svc: socket %p dequeued, inuse=%d\n", svsk->sk_sk, svsk->sk_inuse); svsk->sk_qued = 0; + svsk->sk_inuse++; } - return svsk; } @@ -169,7 +207,7 @@ * Having read count bytes from a socket, check whether it * needs to be re-enqueued. */ -static inline void +static void svc_sock_received(struct svc_sock *svsk, int count) { spin_lock_bh(&svsk->sk_lock); @@ -177,12 +215,11 @@ printk(KERN_NOTICE "svc: sk_data negative!\n"); svsk->sk_data = 0; } - svsk->sk_rqstp = NULL; /* XXX */ svsk->sk_busy = 0; - if (svsk->sk_conn || svsk->sk_data || svsk->sk_close) { + if (svsk->sk_data || svsk->sk_close) { dprintk("svc: socket %p re-enqueued after receive\n", svsk->sk_sk); - svc_sock_enqueue(svsk); + __svc_sock_enqueue(svsk); } spin_unlock_bh(&svsk->sk_lock); } @@ -190,89 +227,91 @@ /* * Dequeue a new connection. */ -static inline void -svc_sock_accepted(struct svc_sock *svsk) +static void +svc_sock_accepted(struct svc_sock *svsk, int count) { spin_lock_bh(&svsk->sk_lock); - svsk->sk_busy = 0; - svsk->sk_conn--; - if (svsk->sk_conn || svsk->sk_data || svsk->sk_close) { - dprintk("svc: socket %p re-enqueued after accept\n", + svsk->sk_busy = 0; + if ((svsk->sk_conn -= count) < 0) { + printk(KERN_NOTICE "svc: sk_conn negative!\n"); + svsk->sk_conn = 0; + } + if (svsk->sk_conn || svsk->sk_close) { + dprintk("svc: socket %p re-enqueued after accept\n", svsk->sk_sk); - svc_sock_enqueue(svsk); - } + __svc_sock_enqueue(svsk); + } spin_unlock_bh(&svsk->sk_lock); } /* * Release a socket after use. */ -static inline void -svc_sock_release(struct svc_rqst *rqstp) +static void +svc_sock_release(struct svc_sock *svsk) { - struct svc_sock *svsk = rqstp->rq_sock; - - if (!svsk) - return; - svc_release_skb(rqstp); - rqstp->rq_sock = NULL; - if (!--(svsk->sk_inuse) && svsk->sk_dead) { - dprintk("svc: releasing dead socket\n"); - sock_release(svsk->sk_sock); - kfree(svsk); + spin_lock_bh(&svsk->sk_lock); + --svsk->sk_inuse; + if (svsk->sk_close && !svsk->sk_inuse) + goto out_delete; + if (svsk->sk_conn || svsk->sk_data) { + __svc_sock_enqueue(svsk); + goto out; } + if (svsk->sk_temp) + __svc_sock_add_timer(svsk, RPCSVC_TIMEOUT); + out: + spin_unlock_bh(&svsk->sk_lock); + return; + out_delete: + spin_unlock_bh(&svsk->sk_lock); + dprintk("svc: releasing dead socket\n"); + svc_delete_socket(svsk); } /* - * External function to wake up a server waiting for data + * Count buffer length when someone throws us an array of iovec. */ -void -svc_wake_up(struct svc_serv *serv) +static inline unsigned int +svc_iovlen(struct iovec *iov, unsigned int nr) { - struct svc_rqst *rqstp; + unsigned buflen; - spin_lock_bh(&serv->sv_lock); - if ((rqstp = serv->sv_threads) != NULL) { - dprintk("svc: daemon %p woken up.\n", rqstp); - /* - svc_serv_dequeue(serv, rqstp); - rqstp->rq_sock = NULL; - */ - wake_up(&rqstp->rq_wait); - } - spin_unlock_bh(&serv->sv_lock); + for (buflen = 0; nr; iov++, nr--) + buflen += iov->iov_len; + return buflen; } /* * Generic sendto routine */ static int -svc_sendto(struct svc_rqst *rqstp, struct iovec *iov, int nr) +svc_sendto(struct svc_sock *svsk, struct sockaddr_in *to, int tolen, + struct iovec *iov, unsigned nr, unsigned buflen, int flags) { mm_segment_t oldfs; - struct svc_sock *svsk = rqstp->rq_sock; struct socket *sock = svsk->sk_sock; struct msghdr msg; - int i, buflen, len; + int len; - for (i = buflen = 0; i < nr; i++) - buflen += iov[i].iov_len; + if (svsk->sk_close) + return -ENOTCONN; - msg.msg_name = &rqstp->rq_addr; - msg.msg_namelen = sizeof(rqstp->rq_addr); + msg.msg_name = to; + msg.msg_namelen = tolen; msg.msg_iov = iov; msg.msg_iovlen = nr; msg.msg_control = NULL; msg.msg_controllen = 0; - msg.msg_flags = MSG_DONTWAIT; + msg.msg_flags = flags; - oldfs = get_fs(); set_fs(KERNEL_DS); + oldfs = get_fs(); set_fs(get_ds()); len = sock_sendmsg(sock, &msg, buflen); set_fs(oldfs); dprintk("svc: socket %p sendto([%p %Zu... ], %d, %d) = %d\n", - rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, nr, buflen, len); + svsk, iov[0].iov_base, iov[0].iov_len, nr, buflen, len); return len; } @@ -283,14 +322,13 @@ static int svc_recv_available(struct svc_sock *svsk) { - mm_segment_t oldfs; struct socket *sock = svsk->sk_sock; + mm_segment_t oldfs; int avail, err; - oldfs = get_fs(); set_fs(KERNEL_DS); - err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail); + oldfs = get_fs(); set_fs(get_ds()); + err = sock->ops->ioctl(sock, SIOCINQ, (unsigned long) &avail); set_fs(oldfs); - return (err >= 0)? avail : err; } @@ -298,38 +336,26 @@ * Generic recvfrom routine. */ static int -svc_recvfrom(struct svc_rqst *rqstp, struct iovec *iov, int nr, int buflen) +svc_recvfrom(struct svc_sock *svsk, struct iovec *iov, int nr, int buflen, int flags) { mm_segment_t oldfs; + struct socket *sock = svsk->sk_sock; struct msghdr msg; - struct socket *sock; - int len, alen; - - rqstp->rq_addrlen = sizeof(rqstp->rq_addr); - sock = rqstp->rq_sock->sk_sock; + int len; - msg.msg_name = &rqstp->rq_addr; - msg.msg_namelen = sizeof(rqstp->rq_addr); + msg.msg_name = NULL; + msg.msg_namelen = 0; msg.msg_iov = iov; msg.msg_iovlen = nr; msg.msg_control = NULL; msg.msg_controllen = 0; + msg.msg_flags = 0; - msg.msg_flags = MSG_DONTWAIT; - - oldfs = get_fs(); set_fs(KERNEL_DS); - len = sock_recvmsg(sock, &msg, buflen, MSG_DONTWAIT); + oldfs = get_fs(); set_fs(get_ds()); + len = sock_recvmsg(sock, &msg, buflen, flags); set_fs(oldfs); - - /* sock_recvmsg doesn't fill in the name/namelen, so we must.. - * possibly we should cache this in the svc_sock structure - * at accept time. FIXME - */ - alen = sizeof(rqstp->rq_addr); - sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1); - dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", - rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); + svsk, iov[0].iov_base, iov[0].iov_len, len); return len; } @@ -344,11 +370,11 @@ if (!svsk) goto out; - dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", + dprintk("svc: UDP data ready, socket %p(inet %p), count=%d, busy=%d\n", svsk, sk, count, svsk->sk_busy); spin_lock_bh(&svsk->sk_lock); - svsk->sk_data = 1; - svc_sock_enqueue(svsk); + svsk->sk_data++; + __svc_sock_enqueue(svsk); spin_unlock_bh(&svsk->sk_lock); out: if (sk->sleep && waitqueue_active(sk->sleep)) @@ -365,15 +391,15 @@ struct svc_serv *serv = svsk->sk_server; struct sk_buff *skb; u32 *data; - int err, len; + int err, len, recv = svsk->sk_data; - svsk->sk_data = 0; - while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { - svc_sock_received(svsk, 0); - if (err == -EAGAIN) - return err; - /* possibly an icmp error */ + if (!(skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err))) { dprintk("svc: recvfrom returned error %d\n", -err); + if (err == -EAGAIN) + svc_sock_received(svsk, recv); + else + svc_sock_received(svsk, 0); + return err; } if (skb->ip_summed != CHECKSUM_UNNECESSARY) { @@ -381,36 +407,34 @@ csum = csum_partial(skb->h.raw, skb->len, csum); if ((unsigned short)csum_fold(csum)) { skb_free_datagram(svsk->sk_sk, skb); - svc_sock_received(svsk, 0); - return 0; + svc_sock_received(svsk, 1); + return -EAGAIN; } } - /* There may be more data */ - svsk->sk_data = 1; - len = skb->len - sizeof(struct udphdr); data = (u32 *) (skb->h.raw + sizeof(struct udphdr)); rqstp->rq_skbuff = skb; rqstp->rq_argbuf.base = data; rqstp->rq_argbuf.buf = data; - rqstp->rq_argbuf.len = (len >> 2); - /* rqstp->rq_resbuf = rqstp->rq_defbuf; */ + rqstp->rq_argbuf.len = len >> 2; rqstp->rq_prot = IPPROTO_UDP; /* Get sender address */ rqstp->rq_addr.sin_family = AF_INET; rqstp->rq_addr.sin_port = skb->h.uh->source; rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr; + rqstp->rq_name = &rqstp->rq_addr; + rqstp->rq_namelen = sizeof(rqstp->rq_addr); + rqstp->rq_secure = ntohs(rqstp->rq_name->sin_port) < 1024; if (serv->sv_stats) serv->sv_stats->netudpcnt++; /* One down, maybe more to go... */ svsk->sk_sk->stamp = skb->stamp; - svc_sock_received(svsk, 0); - + svc_sock_received(svsk, 1); return len; } @@ -418,20 +442,25 @@ svc_udp_sendto(struct svc_rqst *rqstp) { struct svc_buf *bufp = &rqstp->rq_resbuf; + unsigned int buflen; int error; /* Set up the first element of the reply iovec. * Any other iovecs that may be in use have been taken * care of by the server implementation itself. */ - /* bufp->base = bufp->area; */ - bufp->iov[0].iov_base = bufp->base; bufp->iov[0].iov_len = bufp->len << 2; - error = svc_sendto(rqstp, bufp->iov, bufp->nriov); + buflen = svc_iovlen(bufp->iov, bufp->nriov); + + error = svc_sendto(rqstp->rq_sock, + rqstp->rq_name, rqstp->rq_namelen, + bufp->iov, bufp->nriov, buflen, MSG_DONTWAIT); if (error == -ECONNREFUSED) /* ICMP error on earlier request. */ - error = svc_sendto(rqstp, bufp->iov, bufp->nriov); + error = svc_sendto(rqstp->rq_sock, + rqstp->rq_name, rqstp->rq_namelen, + bufp->iov, bufp->nriov, buflen, MSG_DONTWAIT); else if (error == -EAGAIN) /* Ignore and wait for re-xmit */ error = 0; @@ -461,18 +490,19 @@ dprintk("svc: socket %p TCP (listen) state change %d\n", sk, sk->state); - if (sk->state != TCP_ESTABLISHED) { - /* Aborted connection, SYN_RECV or whatever... */ - goto out; - } if (!(svsk = (struct svc_sock *) sk->user_data)) { - printk("svc: socket %p: no user data\n", sk); + printk("svc_state_change1: socket %p: no user data\n", sk); goto out; } - spin_lock_bh(&svsk->sk_lock); - svsk->sk_conn++; - svc_sock_enqueue(svsk); - spin_unlock_bh(&svsk->sk_lock); + switch(sk->state) { + case TCP_ESTABLISHED: + spin_lock_bh(&svsk->sk_lock); + svsk->sk_conn++; + __svc_sock_enqueue(svsk); + spin_unlock_bh(&svsk->sk_lock); + break; + default: + } out: if (sk->sleep && waitqueue_active(sk->sleep)) wake_up_interruptible_all(sk->sleep); @@ -490,12 +520,21 @@ sk, sk->state, sk->user_data); if (!(svsk = (struct svc_sock *) sk->user_data)) { - printk("svc: socket %p: no user data\n", sk); + printk("svc_tcp_state_change2: socket %p: no user data\n", sk); goto out; } + spin_lock_bh(&svsk->sk_lock); - svsk->sk_close = 1; - svc_sock_enqueue(svsk); + switch(sk->state) { + case TCP_ESTABLISHED: + svsk->sk_data++; + __svc_sock_enqueue(svsk); + break; + default: + __svc_sock_close(svsk); + __svc_sock_enqueue(svsk); + break; + } spin_unlock_bh(&svsk->sk_lock); out: if (sk->sleep && waitqueue_active(sk->sleep)) @@ -505,42 +544,70 @@ static void svc_tcp_data_ready(struct sock *sk, int count) { - struct svc_sock * svsk; + struct svc_sock *svsk = (struct svc_sock *)(sk->user_data); - dprintk("svc: socket %p TCP data ready (svsk %p)\n", - sk, sk->user_data); - if (!(svsk = (struct svc_sock *)(sk->user_data))) + if (!svsk) goto out; + dprintk("svc: TCP data ready, socket %p(inet %p), count=%d, busy=%d\n", + svsk, sk, count, svsk->sk_busy); + spin_lock_bh(&svsk->sk_lock); svsk->sk_data++; - svc_sock_enqueue(svsk); + __svc_sock_enqueue(svsk); spin_unlock_bh(&svsk->sk_lock); out: if (sk->sleep && waitqueue_active(sk->sleep)) wake_up_interruptible(sk->sleep); } +static void +svc_tcp_write_space(struct sock *sk) +{ + struct svc_sock *svsk = (struct svc_sock *)(sk->user_data); + struct svc_serv *serv; + struct socket *sock = sk->socket; + unsigned int minfree; + + if (!svsk || !sock) + return; + + serv = svsk->sk_server; + minfree = min(sk->sndbuf, serv->sv_bufsz + SOCK_MIN_WRITE_SPACE); + if (sock_wspace(sk) < minfree) + return; + + if (test_bit(SOCK_NOSPACE, &sock->flags)) { + if (sk->sleep && waitqueue_active(sk->sleep)) { + clear_bit(SOCK_NOSPACE, &sock->flags); + wake_up_interruptible(sk->sleep); + } + } +} + /* * Accept a TCP connection */ -static void -svc_tcp_accept(struct svc_sock *svsk) +static int +svc_tcp_accept(struct svc_rqst *rqstp) { - struct sockaddr_in sin; - struct svc_serv *serv = svsk->sk_server; + struct svc_serv *serv = rqstp->rq_server; + struct svc_sock *svsk = rqstp->rq_sock; struct socket *sock = svsk->sk_sock; + struct sockaddr_in sin; + struct svc_sock *newsvsk; struct socket *newsock; struct proto_ops *ops; - struct svc_sock *newsvsk; - int err, slen; + int err, slen, recv = svsk->sk_conn; + + dprintk("svc: tcp_accept %p sock %p conn %d close %d\n", + svsk, sock, svsk->sk_conn, svsk->sk_close); - dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); if (!sock) - return; + goto fail_release; if (!(newsock = sock_alloc())) { printk(KERN_WARNING "%s: no more sockets!\n", serv->sv_name); - return; + goto fail_release; } dprintk("svc: tcp_accept %p allocated\n", newsock); @@ -548,10 +615,10 @@ newsock->ops = ops = sock->ops; if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) { - if (net_ratelimit()) + if (err != -EAGAIN && net_ratelimit()) printk(KERN_WARNING "%s: accept failed (err %d)!\n", serv->sv_name, -err); - goto failed; /* aborted connection or whatever */ + goto fail_accept; /* aborted connection or whatever */ } slen = sizeof(sin); @@ -581,23 +648,74 @@ if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0))) goto failed; + /* Cache the socket address */ + memcpy(&newsvsk->sk_addr, &sin, slen); + newsvsk->sk_addrlen = slen; + /* Precharge. Data may have arrived on the socket before we * installed the data_ready callback. */ spin_lock_bh(&newsvsk->sk_lock); - newsvsk->sk_data = 1; + newsvsk->sk_data++; newsvsk->sk_temp = 1; - svc_sock_enqueue(newsvsk); + __svc_sock_enqueue(newsvsk); spin_unlock_bh(&newsvsk->sk_lock); if (serv->sv_stats) serv->sv_stats->nettcpconn++; - return; - + svc_sock_accepted(svsk, 0); + return 0; failed: + svc_sock_accepted(svsk, 0); sock_release(newsock); - return; + return 0; +fail_accept: + svc_sock_accepted(svsk, recv); + sock_release(newsock); + return 0; +fail_release: + svc_sock_accepted(svsk, 1); + return 0; +} + +/* + * Receive fragment header data from a TCP socket + */ +static int +svc_tcp_recvhdr(struct svc_sock *svsk) +{ + struct iovec iov; + unsigned int want; + int len; + + if (svsk->sk_tcplen >= 4) + return 0; + + want = 4 - svsk->sk_tcplen; + iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; + iov.iov_len = want; + len = svc_recvfrom(svsk, &iov, 1, want, MSG_DONTWAIT); + if (len < 0) + return len; + svsk->sk_tcplen += len; + if (len < want) + return -EAGAIN; + svsk->sk_reclen = ntohl(svsk->sk_reclen); + if (!(svsk->sk_reclen & 0x80000000)) { + /* FIXME: technically, a record can be fragmented, + * and non-terminal fragments will not have the top + * bit set in the fragment length header. + * But apparently no known nfs clients send fragmented + * records. */ + if (net_ratelimit()) + printk(KERN_NOTICE "RPC: bad TCP reclen %08lx", + (unsigned long) svsk->sk_reclen); + return -EIO; + } + svsk->sk_reclen &= 0x7fffffff; + dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); + return 0; } /* @@ -606,121 +724,95 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) { + struct svc_serv *serv = rqstp->rq_server; struct svc_sock *svsk = rqstp->rq_sock; - struct svc_serv *serv = svsk->sk_server; struct svc_buf *bufp = &rqstp->rq_argbuf; - int len, ready, used; + struct sockaddr_in *sin = rqstp->rq_name; + int len, err, recv = svsk->sk_data; - dprintk("svc: tcp_recv %p data %d conn %d close %d\n", - svsk, svsk->sk_data, svsk->sk_conn, svsk->sk_close); + dprintk("svc: tcp_recv %p sock %p data %d close %d\n", + svsk, svsk->sk_sock, svsk->sk_data, svsk->sk_close); if (svsk->sk_close) { - svc_delete_socket(svsk); - return 0; - } - - if (svsk->sk_conn) { - svc_tcp_accept(svsk); - svc_sock_accepted(svsk); - return 0; + err = -ENOTCONN; + goto error; } - ready = svsk->sk_data; + rqstp->rq_name = &svsk->sk_addr; + rqstp->rq_namelen = svsk->sk_addrlen; + rqstp->rq_secure = 1; /* Receive data. If we haven't got the record length yet, get * the next four bytes. Otherwise try to gobble up as much as * possible up to the complete record length. */ if (svsk->sk_tcplen < 4) { - unsigned long want = 4 - svsk->sk_tcplen; - struct iovec iov; - - iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; - iov.iov_len = want; - if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) + err = svc_tcp_recvhdr(svsk); + if (err < 0) + goto error; + /* Check whether we have a large enough buffer */ + if (svsk->sk_reclen > bufp->iov[0].iov_len) { + if (net_ratelimit()) + printk(KERN_ERR "svc/tcp_recv: client at %u.%u.%u.%u:%d is sending too large requests.\n", + NIPQUAD(sin->sin_addr.s_addr), + ntohs(sin->sin_port)); + err = -EIO; goto error; - svsk->sk_tcplen += len; - - svsk->sk_reclen = ntohl(svsk->sk_reclen); - if (!(svsk->sk_reclen & 0x80000000)) { - /* FIXME: technically, a record can be fragmented, - * and non-terminal fragments will not have the top - * bit set in the fragment length header. - * But apparently no known nfs clients send fragmented - * records. */ - /* FIXME: shutdown socket */ - printk(KERN_NOTICE "RPC: bad TCP reclen %08lx", - (unsigned long) svsk->sk_reclen); - return -EIO; } - svsk->sk_reclen &= 0x7fffffff; - dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); } - /* Check whether enough data is available */ len = svc_recv_available(svsk); - if (len < 0) + if ((err = len) < 0) goto error; - if (len < svsk->sk_reclen) { - /* FIXME: if sk_reclen > window-size, then we will - * never be able to receive the record, so should - * shutdown the connection - */ dprintk("svc: incomplete TCP record (%d of %d)\n", len, svsk->sk_reclen); - svc_sock_received(svsk, ready); - return -EAGAIN; /* record not complete */ + err = -EAGAIN; + goto error; } - /* if we think there is only one more record to read, but - * it is bigger than we expect, then two records must have arrived - * together, so pretend we aren't using the record.. */ - if (len > svsk->sk_reclen && ready == 1) - used = 0; - else used = 1; - - /* Frob argbuf */ - bufp->iov[0].iov_base += 4; - bufp->iov[0].iov_len -= 4; /* Now receive data */ - len = svc_recvfrom(rqstp, bufp->iov, bufp->nriov, svsk->sk_reclen); - if (len < 0) + len = svc_recvfrom(svsk, bufp->iov, bufp->nriov, svsk->sk_reclen, MSG_DONTWAIT); + if ((err = len) < 0) goto error; + if (len < svsk->sk_reclen) { + printk(KERN_ERR "svc/tcp_recv: Huh? Data disappeared from beneath us...\n"); + err = -EIO; + goto error; + } + + bufp->len = len >> 2; + rqstp->rq_prot = IPPROTO_TCP; dprintk("svc: TCP complete record (%d bytes)\n", len); /* Position reply write pointer immediately after * record length */ - rqstp->rq_resbuf.buf += 1; + rqstp->rq_resbuf.buf = rqstp->rq_resbuf.base + 1; rqstp->rq_resbuf.len = 1; - rqstp->rq_skbuff = 0; - rqstp->rq_argbuf.buf += 1; - rqstp->rq_argbuf.len = (len >> 2); - rqstp->rq_prot = IPPROTO_TCP; - /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; - svc_sock_received(svsk, used); if (serv->sv_stats) serv->sv_stats->nettcpcnt++; - + svc_sock_received(svsk, 0); return len; -error: - if (len == -EAGAIN) { - dprintk("RPC: TCP recvfrom got EAGAIN\n"); - svc_sock_received(svsk, ready); /* Clear data ready */ - } else { + error: + switch(err) { + case -EAGAIN: + dprintk("svc: TCP recvfrom got EAGAIN\n"); + svc_sock_received(svsk, recv); /* Clear data ready */ + break; + default: + svc_sock_close(svsk); printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", - svsk->sk_server->sv_name, -len); + svsk->sk_server->sv_name, -err); svc_sock_received(svsk, 0); } - - return len; + return err; } /* @@ -731,26 +823,33 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) { + struct svc_sock *svsk = rqstp->rq_sock; struct svc_buf *bufp = &rqstp->rq_resbuf; - int sent; + unsigned int buflen; + int sent = -EINTR; /* Set up the first element of the reply iovec. * Any other iovecs that may be in use have been taken * care of by the server implementation itself. */ - bufp->iov[0].iov_base = bufp->base; bufp->iov[0].iov_len = bufp->len << 2; - bufp->base[0] = htonl(0x80000000|((bufp->len << 2) - 4)); - sent = svc_sendto(rqstp, bufp->iov, bufp->nriov); - if (sent != bufp->len<<2) { - printk(KERN_NOTICE "rpc-srv/tcp: %s: sent only %d bytes of %d - should shutdown socket\n", - rqstp->rq_sock->sk_server->sv_name, - sent, bufp->len << 2); - /* FIXME: should shutdown the socket, or allocate more memort - * or wait and try again or something. Otherwise - * client will get confused - */ + buflen = svc_iovlen(bufp->iov, bufp->nriov); + + bufp->base[0] = htonl(0x80000000|(buflen - sizeof(bufp->base[0]))); + + if (down_interruptible(&svsk->sk_sem) == 0) { + sent = svc_sendto(svsk, NULL, 0, bufp->iov, bufp->nriov, buflen, 0); + up(&svsk->sk_sem); + } + if (sent < 0) { + printk(KERN_NOTICE "rpc-srv/tcp: %s: send retured error %d - shutdown socket\n", + svsk->sk_server->sv_name, sent); + svc_sock_close(svsk); + } else if (sent < buflen) { + printk(KERN_NOTICE "rpc-srv/tcp: %s: sent only %d bytes of %d - shutdown socket\n", + svsk->sk_server->sv_name, sent, buflen); + svc_sock_close(svsk); } return sent; } @@ -760,108 +859,145 @@ { struct sock *sk = svsk->sk_sk; - svsk->sk_recvfrom = svc_tcp_recvfrom; - svsk->sk_sendto = svc_tcp_sendto; - if (sk->state == TCP_LISTEN) { dprintk("setting up TCP socket for listening\n"); sk->state_change = svc_tcp_state_change1; + sk->write_space = svc_tcp_write_space; + svsk->sk_recvfrom = svc_tcp_accept; + svsk->sk_sendto = NULL; } else { dprintk("setting up TCP socket for reading\n"); sk->state_change = svc_tcp_state_change2; sk->data_ready = svc_tcp_data_ready; - - svsk->sk_reclen = 0; - svsk->sk_tcplen = 0; + svsk->sk_recvfrom = svc_tcp_recvfrom; + svsk->sk_sendto = svc_tcp_sendto; } return 0; } +static struct svc_rqst * +svc_dequeue_request(struct svc_thread *thread) +{ + struct svc_serv *serv = thread->th_server; + struct svc_rqst *rqstp; + + if ((rqstp = svc_unsched_request(serv)) != NULL) { + dprintk("svc: request %p handled by daemon %p\n", rqstp, thread); + rqstp->rq_thread = thread; + } + return rqstp; +} + +static void +svc_release_request(struct svc_rqst *rqstp) +{ + struct svc_serv *serv = rqstp->rq_server; + struct svc_sock *svsk = rqstp->rq_sock; + + svc_release_skb(rqstp); + if (svsk) { + rqstp->rq_sock = NULL; + svc_sock_release(svsk); + } + if (rqstp->rq_oneshot) { + svc_delete_request(rqstp); + return; + } + /* Reinitialize the buffers */ + rqstp->rq_thread = NULL; + rqstp->rq_procinfo = NULL; + memset(&rqstp->rq_cred, 0, sizeof(rqstp->rq_cred)); + memcpy(&rqstp->rq_argbuf, &rqstp->rq_defabuf, sizeof(rqstp->rq_argbuf)); + memcpy(&rqstp->rq_resbuf, &rqstp->rq_defrbuf, sizeof(rqstp->rq_resbuf)); + rqstp->rq_xid = 0; + rqstp->rq_prog = 0; + rqstp->rq_vers = 0; + rqstp->rq_proc = 0; + rqstp->rq_prot = 0; + rqstp->rq_verfed = 0; + rqstp->rq_userset = 0; + rqstp->rq_secure = 0; + rqstp->rq_auth = 0; + rqstp->rq_processed = 0; + spin_lock_bh(&serv->sv_lock); + if ((svsk = __svc_sock_dequeue(serv)) != NULL) { + rqstp->rq_sock = svsk; + dprintk("svc: socket %p and request %p scheduled\n", svsk, rqstp); + __svc_schedule_request(serv, rqstp); + } else { + dprintk("svc: request %p returning to pool\n", rqstp); + __svc_pool_request(serv, rqstp); + } + spin_unlock_bh(&serv->sv_lock); +} + /* * Receive the next request on any socket. */ -int -svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout) +struct svc_rqst * +svc_recv(struct svc_thread *thread) { + struct svc_serv *serv = thread->th_server; + struct svc_rqst *rqstp; struct svc_sock *svsk; int len; - DECLARE_WAITQUEUE(wait, current); - dprintk("svc: server %p waiting for data (to = %ld)\n", - rqstp, timeout); + dprintk("svc: daemon %p waiting for data\n", thread); - if (rqstp->rq_sock) - printk(KERN_ERR - "svc_recv: service %p, socket not NULL!\n", - rqstp); - if (waitqueue_active(&rqstp->rq_wait)) - printk(KERN_ERR - "svc_recv: service %p, wait queue active!\n", - rqstp); - -again: - /* Initialize the buffers */ - rqstp->rq_argbuf = rqstp->rq_defbuf; - rqstp->rq_resbuf = rqstp->rq_defbuf; + for (;;) { + if ((rqstp = svc_dequeue_request(thread)) != NULL) { - if (signalled()) - return -EINTR; + svsk = rqstp->rq_sock; - spin_lock_bh(&serv->sv_lock); - if ((svsk = svc_sock_dequeue(serv)) != NULL) { - rqstp->rq_sock = svsk; - svsk->sk_inuse++; - } else { - /* No data pending. Go to sleep */ - svc_serv_enqueue(serv, rqstp); - - /* - * We have to be able to interrupt this wait - * to bring down the daemons ... - */ - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&rqstp->rq_wait, &wait); - spin_unlock_bh(&serv->sv_lock); - - schedule_timeout(timeout); - - spin_lock_bh(&serv->sv_lock); - remove_wait_queue(&rqstp->rq_wait, &wait); - - if (!(svsk = rqstp->rq_sock)) { - svc_serv_dequeue(serv, rqstp); - spin_unlock_bh(&serv->sv_lock); - dprintk("svc: server %p, no data yet\n", rqstp); - return signalled()? -EINTR : -EAGAIN; + /* Garbage collect any pending timers */ + svc_sock_delete_timer(svsk); + + /* Are we just trying to resend an old request? */ + if (rqstp->rq_processed) { + svc_send(rqstp); + continue; + } + if (!rqstp->rq_sock || rqstp->rq_sock->sk_close) { + svc_release_request(rqstp); + continue; + } + break; } + + /* No data pending. Go to sleep */ + dprintk("svc: daemon %p, no data yet\n", thread); + return NULL; } - spin_unlock_bh(&serv->sv_lock); - dprintk("svc: server %p, socket %p, inuse=%d\n", + dprintk("svc: request %p, socket %p, inuse=%d\n", rqstp, svsk, svsk->sk_inuse); len = svsk->sk_recvfrom(rqstp); dprintk("svc: got len=%d\n", len); - /* No data, incomplete (TCP) read, or accept() */ - if (len == 0 || len == -EAGAIN) { - svc_sock_release(rqstp); - goto again; + /* + * No data, incomplete (TCP) read, or accept(). + * Nevertheless we return in order to honour the timeout. + */ + if (len <= 0) { + if (len == 0 || len == -EAGAIN) { + svc_release_request(rqstp); + return ERR_PTR(-EAGAIN); + } else { + svc_drop(rqstp); + return ERR_PTR(len); + } } - rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024; rqstp->rq_userset = 0; rqstp->rq_verfed = 0; svc_getlong(&rqstp->rq_argbuf, rqstp->rq_xid); svc_putlong(&rqstp->rq_resbuf, rqstp->rq_xid); - /* Assume that the reply consists of a single buffer. */ - rqstp->rq_resbuf.nriov = 1; - if (serv->sv_stats) serv->sv_stats->netcnt++; - return len; + return rqstp; } /* @@ -870,8 +1006,18 @@ void svc_drop(struct svc_rqst *rqstp) { + struct svc_sock *svsk = rqstp->rq_sock; + + if (!svsk) + return; + dprintk("svc: socket %p dropped request\n", rqstp->rq_sock); - svc_sock_release(rqstp); + + /* Is this a TCP connection? Kill it! */ + if (svsk->sk_temp) + svc_sock_close(svsk); + + svc_release_request(rqstp); } /* @@ -893,7 +1039,7 @@ svc_release_skb(rqstp); len = svsk->sk_sendto(rqstp); - svc_sock_release(rqstp); + svc_release_request(rqstp); if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) return 0; @@ -909,7 +1055,7 @@ int *errp, int pmap_register) { struct svc_sock *svsk; - struct sock *inet; + struct sock *sk = sock->sk; dprintk("svc: svc_setup_socket %p\n", sock); if (!(svsk = kmalloc(sizeof(*svsk), GFP_KERNEL))) { @@ -917,32 +1063,38 @@ return NULL; } memset(svsk, 0, sizeof(*svsk)); + spin_lock_init(&svsk->sk_lock); + init_MUTEX(&svsk->sk_sem); - inet = sock->sk; - inet->user_data = svsk; + init_timer(&svsk->sk_timer); + svsk->sk_timer.data = (unsigned long)svsk; + svsk->sk_timer.function = (void (*)(unsigned long)) svc_sock_timeout; + sk->sndtimeo = RPCSVC_SEND_TIMEOUT; + + sk->user_data = svsk; svsk->sk_sock = sock; - svsk->sk_sk = inet; - svsk->sk_ostate = inet->state_change; - svsk->sk_odata = inet->data_ready; + svsk->sk_sk = sk; + svsk->sk_ostate = sk->state_change; + svsk->sk_odata = sk->data_ready; + svsk->sk_owspace = sk->write_space; svsk->sk_server = serv; - spin_lock_init(&svsk->sk_lock); /* Initialize the socket */ + spin_lock_bh(&svsk->sk_lock); + svsk->sk_busy = 1; if (sock->type == SOCK_DGRAM) *errp = svc_udp_init(svsk); else *errp = svc_tcp_init(svsk); -if (svsk->sk_sk == NULL) - printk(KERN_WARNING "svsk->sk_sk == NULL after svc_prot_init!\n"); + spin_unlock_bh(&svsk->sk_lock); + if (*errp < 0) + goto out_err; /* Register socket with portmapper */ - if (*errp >= 0 && pmap_register) - *errp = svc_register(serv, inet->protocol, ntohs(inet->sport)); - - if (*errp < 0) { - inet->user_data = NULL; - kfree(svsk); - return NULL; + if (pmap_register) { + *errp = svc_register(serv, sk->protocol, ntohs(sk->sport)); + if (*errp < 0) + goto out_err; } spin_lock_bh(&serv->sv_lock); @@ -950,9 +1102,24 @@ serv->sv_allsocks = svsk; spin_unlock_bh(&serv->sv_lock); + spin_lock_bh(&svsk->sk_lock); + svsk->sk_busy = 0; + spin_unlock_bh(&svsk->sk_lock); + dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); return svsk; + out_err: + spin_lock_bh(&svsk->sk_lock); + sk->user_data = NULL; + sk->state_change = svsk->sk_ostate; + sk->data_ready = svsk->sk_odata; + sk->write_space = svsk->sk_owspace; + __svc_sock_disable_timer(svsk); + spin_unlock_bh(&svsk->sk_lock); + svc_sock_delete_timer(svsk); + kfree(svsk); + return NULL; } /* @@ -1009,42 +1176,48 @@ svc_delete_socket(struct svc_sock *svsk) { struct svc_sock **rsk; - struct svc_serv *serv; - struct sock *sk; + struct svc_serv *serv = svsk->sk_server; + struct sock *sk = svsk->sk_sk; dprintk("svc: svc_delete_socket(%p)\n", svsk); - serv = svsk->sk_server; - sk = svsk->sk_sk; - - sk->state_change = svsk->sk_ostate; - sk->data_ready = svsk->sk_odata; + spin_lock_bh(&svsk->sk_lock); + __svc_sock_close(svsk); + svsk->sk_dead = 1; - spin_lock_bh(&serv->sv_lock); + if (svsk->sk_inuse) { + dprintk("svc: server socket destroy delayed\n"); + spin_unlock_bh(&svsk->sk_lock); + return; + } + spin_lock(&serv->sv_lock); for (rsk = &serv->sv_allsocks; *rsk; rsk = &(*rsk)->sk_list) { - if (*rsk == svsk) + if (*rsk == svsk) { + *rsk = svsk->sk_list; break; + } } - if (!*rsk) { - spin_unlock_bh(&serv->sv_lock); - return; - } - *rsk = svsk->sk_list; - if (svsk->sk_qued) + if (svsk->sk_qued) { rpc_remove_list(&serv->sv_sockets, svsk); + svsk->sk_qued = 0; + } + spin_unlock(&serv->sv_lock); - spin_unlock_bh(&serv->sv_lock); + /* Disable any pending timers in order to prevent races */ + __svc_sock_disable_timer(svsk); - svsk->sk_dead = 1; + sk->state_change = svsk->sk_ostate; + sk->data_ready = svsk->sk_odata; + sk->write_space = svsk->sk_owspace; + sk->user_data = NULL; + spin_unlock_bh(&svsk->sk_lock); - if (!svsk->sk_inuse) { - sock_release(svsk->sk_sock); - kfree(svsk); - } else { - printk(KERN_NOTICE "svc: server socket destroy delayed\n"); - /* svsk->sk_server = NULL; */ - } + /* Synchronously delete any pending timers */ + svc_sock_delete_timer(svsk); + + sock_release(svsk->sk_sock); + kfree(svsk); } /*