[PATCH 0/7] Permit filesystem local caching and NFS superblock
sharing [try #13]
Bryce Harrington
bryce at osdl.org
Thu Sep 7 14:18:05 EDT 2006
With this patch, the fs_cache patchset works very well in our test
harness.
http://crucible.osdl.org/runs/branches.html
Run 1938 in the above chart is the one with trond's patch, and as you
can see, all tests pass OK. Pynfs, connectathon, and LTP are all
returning results within expectation, and iozone is returning
performance comparable to the run against trond's tree alone:
http://crucible.osdl.org/runs/1747/test_output/iozone.sys.log.png
http://crucible.osdl.org/runs/1938/test_output/iozone.sys.log.png
Actually it appears the run with the fs_cache patch (1938) performed
slightly better than the run without (1747), however I wouldn't count
this as conclusive without further testing, as it could simply be
noise.
Bryce
P.S., For the record, here are the patches we're using for run 1938:
http://crucible.osdl.org/packages/nfsv4/fs-cache-1.patch
http://crucible.osdl.org/packages/nfsv4/fs-cache-2.patch
http://crucible.osdl.org/packages/nfsv4/fs-cache-3.patch
http://crucible.osdl.org/packages/nfsv4/fs-cache-4.patch
http://crucible.osdl.org/packages/nfsv4/fs-cache-5.patch
http://crucible.osdl.org/packages/nfsv4/fs-cache-6.patch
http://crucible.osdl.org/packages/nfsv4/fs-cache-7.patch
http://crucible.osdl.org/packages/nfsv4/trond-clnt-2.patch
http://crucible.osdl.org/packages/nfsv4/trond-clnt-3.patch
On Wed, Sep 06, 2006 at 01:03:33PM -0400, Trond Myklebust wrote:
> On Sun, 2006-09-03 at 01:13 -0400, Trond Myklebust wrote:
> > On Fri, 2006-09-01 at 16:32 -0700, Bryce Harrington wrote:
> > > Last night we re-ran the test, and it did Oops again, in a similar
> > > manner, but this time when running newpynfs with AUTHSYS:
> > >
> > > http://crucible.osdl.org/runs/1750/sysinfo/nfs06.console
> > >
> > > BUG: unable to handle kernel paging request at virtual address 6b6b6b73
> > > printing eip:
> > > c040f26d
> > > *pde = 00000000
> > > Oops: 0000 [#1]
> > > PREEMPT SMP
> > > Modules linked in:
> > > CPU: 0
> > > EIP: 0060:[<c040f26d>] Not tainted VLI
> > > EFLAGS: 00010246 (2.6.18-rc5-gd4a4640-nfs-client-stable #1)
> > > EIP is at pmap_getport_done+0x17/0xae
> > > eax: 6b6b6b6b ebx: dfc56370 ecx: 00000000 edx: dfc56370
> > > esi: dfc562b0 edi: 00000000 ebp: f7aa2f04 esp: f7aa2ef8
> > > ds: 007b es: 007b ss: 0068
> > > Process rpciod/0 (pid: 7635, ti=f7aa2000 task=f7421550 task.ti=f7aa2000)
> > > Stack: dfc56370 dfc563d8 00000000 f7aa2f18 c040a23f dfc56370 dfc4aa34
> > > dfc56370
> > > f7aa2f30 c040a2f7 dfc56370 dfc563e0 dfc563e4 f6b07d24 f7aa2f3c c040a440
> > > dfc56370 f7aa2f60 c012b398 dfc56370 dfc56370 c040a435 00000213 f6b07d24
> > > Call Trace:
> > > [<c01037da>] show_stack_log_lvl+0x8a/0x92
> > > [<c010393b>] show_registers+0x11d/0x186
> > > [<c0103b27>] die+0x10c/0x1c2
> > > [<c0113f31>] do_page_fault+0x3e0/0x4bc
> > > [<c01034ad>] error_code+0x39/0x40
> > > [<c040a23f>] rpc_exit_task+0x1e/0x59
> > > [<c040a2f7>] __rpc_execute+0x7d/0x19c
> > > [<c040a440>] rpc_async_schedule+0xb/0xd
> > > [<c012b398>] run_workqueue+0x85/0xc7
> > > [<c012b4d7>] worker_thread+0xfd/0x131
> > > [<c012e3d2>] kthread+0x84/0xad
> > > [<c0100f19>] kernel_thread_helper+0x5/0xb
> > > Code: 05 c0 00 00 00 50 e8 d8 ae ff ff 5a 8d 65 f4 5b 5e 5f 5d c3 55 89
> > > e5 57 56 8b 45 0c 8b 55 08 53 8b 70 10 8b 7a 18 8b 46 10 85 ff <8b> 58
> > > 08 79 0d 8b 03 6a 00 53 ff 50 14 89 7e 18 eb 1b 8b 55 0c
> > > EIP: [<c040f26d>] pmap_getport_done+0x17/0xae SS:ESP 0068:f7aa2ef8
> >
> > Yep. The portmapper code really should not assume that the task that
> > spawned it will still be around when it finishes. I should have a patch
> > for that by tomorrow.
> >
> > Cheers,
> > Trond
>
> <cough>better late than never</cough>...
>
> Cheers,
> Trond
> Date: Tue, 5 Sep 2006 12:55:57 -0400
> From: Trond Myklebust <Trond.Myklebust at netapp.com>
> Subject: No Subject
>
> In a subsequent patch, this will allow the portmapper to take a reference
> to the rpc_xprt for which it is updating the port number, fixing an Oops.
>
> Signed-off-by: Trond Myklebust <Trond.Myklebust at netapp.com>
> ---
>
> include/linux/sunrpc/xprt.h | 5 ++++-
> net/sunrpc/clnt.c | 8 +++-----
> net/sunrpc/xprt.c | 28 +++++++++++++++++++++++++---
> 3 files changed, 32 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
> index de4efea..bdeba85 100644
> --- a/include/linux/sunrpc/xprt.h
> +++ b/include/linux/sunrpc/xprt.h
> @@ -12,6 +12,7 @@ #define _LINUX_SUNRPC_XPRT_H
> #include <linux/uio.h>
> #include <linux/socket.h>
> #include <linux/in.h>
> +#include <linux/kref.h>
> #include <linux/sunrpc/sched.h>
> #include <linux/sunrpc/xdr.h>
>
> @@ -129,6 +130,7 @@ struct rpc_xprt_ops {
> };
>
> struct rpc_xprt {
> + struct kref kref; /* Reference count */
> struct rpc_xprt_ops * ops; /* transport methods */
> struct socket * sock; /* BSD socket layer */
> struct sock * inet; /* INET layer */
> @@ -248,7 +250,8 @@ int xprt_adjust_timeout(struct rpc_rqs
> void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
> void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
> void xprt_release(struct rpc_task *task);
> -int xprt_destroy(struct rpc_xprt *xprt);
> +struct rpc_xprt * xprt_get(struct rpc_xprt *xprt);
> +void xprt_put(struct rpc_xprt *xprt);
>
> static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p)
> {
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index ceadb72..084a0ad 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -177,7 +177,7 @@ out_no_path:
> kfree(clnt->cl_server);
> kfree(clnt);
> out_err:
> - xprt_destroy(xprt);
> + xprt_put(xprt);
> out_no_xprt:
> return ERR_PTR(err);
> }
> @@ -261,6 +261,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
> atomic_set(&new->cl_users, 0);
> new->cl_parent = clnt;
> atomic_inc(&clnt->cl_count);
> + new->cl_xprt = xprt_get(clnt->cl_xprt);
> /* Turn off autobind on clones */
> new->cl_autobind = 0;
> new->cl_oneshot = 0;
> @@ -337,15 +338,12 @@ rpc_destroy_client(struct rpc_clnt *clnt
> rpc_rmdir(clnt->cl_dentry);
> rpc_put_mount();
> }
> - if (clnt->cl_xprt) {
> - xprt_destroy(clnt->cl_xprt);
> - clnt->cl_xprt = NULL;
> - }
> if (clnt->cl_server != clnt->cl_inline_name)
> kfree(clnt->cl_server);
> out_free:
> rpc_free_iostats(clnt->cl_metrics);
> clnt->cl_metrics = NULL;
> + xprt_put(clnt->cl_xprt);
> kfree(clnt);
> return 0;
> }
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index a85f82b..1f786f6 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -926,6 +926,7 @@ struct rpc_xprt *xprt_create_transport(i
> return ERR_PTR(result);
> }
>
> + kref_init(&xprt->kref);
> spin_lock_init(&xprt->transport_lock);
> spin_lock_init(&xprt->reserve_lock);
>
> @@ -958,16 +959,37 @@ struct rpc_xprt *xprt_create_transport(i
>
> /**
> * xprt_destroy - destroy an RPC transport, killing off all requests.
> - * @xprt: transport to destroy
> + * @kref: kref for the transport to destroy
> *
> */
> -int xprt_destroy(struct rpc_xprt *xprt)
> +static void xprt_destroy(struct kref *kref)
> {
> + struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref);
> +
> dprintk("RPC: destroying transport %p\n", xprt);
> xprt->shutdown = 1;
> del_timer_sync(&xprt->timer);
> xprt->ops->destroy(xprt);
> kfree(xprt);
> +}
>
> - return 0;
> +/**
> + * xprt_put - release a reference to an RPC transport.
> + * @xprt: pointer to the transport
> + *
> + */
> +void xprt_put(struct rpc_xprt *xprt)
> +{
> + kref_put(&xprt->kref, xprt_destroy);
> +}
> +
> +/**
> + * xprt_get - return a reference to an RPC transport.
> + * @xprt: pointer to the transport
> + *
> + */
> +struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
> +{
> + kref_get(&xprt->kref);
> + return xprt;
> }
> Date: Sun, 3 Sep 2006 00:51:55 -0400
> From: Trond Myklebust <Trond.Myklebust at netapp.com>
> Subject: No Subject
>
> There is no guarantee that the parent task still exists when we exit from
> the portmapper. Save the xprt instead.
>
> Signed-off-by: Trond Myklebust <Trond.Myklebust at netapp.com>
> ---
>
> net/sunrpc/pmap_clnt.c | 46 ++++++++++++++++++++++------------------------
> 1 files changed, 22 insertions(+), 24 deletions(-)
>
> diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
> index f476f4d..c04609d 100644
> --- a/net/sunrpc/pmap_clnt.c
> +++ b/net/sunrpc/pmap_clnt.c
> @@ -30,7 +30,7 @@ struct portmap_args {
> u32 pm_vers;
> u32 pm_prot;
> unsigned short pm_port;
> - struct rpc_task * pm_task;
> + struct rpc_xprt * pm_xprt;
> };
>
> static struct rpc_procinfo pmap_procedures[];
> @@ -71,10 +71,10 @@ static const struct rpc_call_ops pmap_ge
> .rpc_release = pmap_map_release,
> };
>
> -static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt)
> +static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt, int status)
> {
> xprt_clear_binding(xprt);
> - rpc_wake_up(&xprt->binding);
> + rpc_wake_up_status(&xprt->binding, status);
> }
>
> /**
> @@ -92,6 +92,7 @@ void rpc_getport(struct rpc_task *task)
> struct portmap_args *map;
> struct rpc_clnt *pmap_clnt;
> struct rpc_task *child;
> + int status;
>
> dprintk("RPC: %4d rpc_getport(%s, %u, %u, %d)\n",
> task->tk_pid, clnt->cl_server,
> @@ -107,34 +108,30 @@ void rpc_getport(struct rpc_task *task)
> }
>
> /* Someone else may have bound if we slept */
> - if (xprt_bound(xprt)) {
> - task->tk_status = 0;
> + status = 0;
> + if (xprt_bound(xprt))
> goto bailout_nofree;
> - }
>
> + status = -ENOMEM;
> map = pmap_map_alloc();
> - if (!map) {
> - task->tk_status = -ENOMEM;
> + if (!map)
> goto bailout_nofree;
> - }
> map->pm_prog = clnt->cl_prog;
> map->pm_vers = clnt->cl_vers;
> map->pm_prot = xprt->prot;
> map->pm_port = 0;
> - map->pm_task = task;
> + map->pm_xprt = xprt_get(xprt);
>
> rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr));
> pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0);
> - if (IS_ERR(pmap_clnt)) {
> - task->tk_status = PTR_ERR(pmap_clnt);
> + status = PTR_ERR(pmap_clnt);
> + if (IS_ERR(pmap_clnt))
> goto bailout;
> - }
>
> + status = -EIO;
> child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map);
> - if (IS_ERR(child)) {
> - task->tk_status = -EIO;
> + if (IS_ERR(child))
> goto bailout;
> - }
> rpc_release_task(child);
>
> rpc_sleep_on(&xprt->binding, task, NULL, NULL);
> @@ -144,8 +141,10 @@ void rpc_getport(struct rpc_task *task)
>
> bailout:
> pmap_map_free(map);
> + xprt_put(xprt);
> bailout_nofree:
> - pmap_wake_portmap_waiters(xprt);
> + task->tk_status = status;
> + pmap_wake_portmap_waiters(xprt, status);
> }
>
> #ifdef CONFIG_ROOT_NFS
> @@ -201,29 +200,28 @@ #endif
> static void pmap_getport_done(struct rpc_task *child, void *data)
> {
> struct portmap_args *map = data;
> - struct rpc_task *task = map->pm_task;
> - struct rpc_xprt *xprt = task->tk_xprt;
> + struct rpc_xprt *xprt = map->pm_xprt;
> int status = child->tk_status;
>
> if (status < 0) {
> /* Portmapper not available */
> xprt->ops->set_port(xprt, 0);
> - task->tk_status = status;
> } else if (map->pm_port == 0) {
> /* Requested RPC service wasn't registered */
> xprt->ops->set_port(xprt, 0);
> - task->tk_status = -EACCES;
> + status = -EACCES;
> } else {
> /* Succeeded */
> xprt->ops->set_port(xprt, map->pm_port);
> xprt_set_bound(xprt);
> - task->tk_status = 0;
> + status = 0;
> }
>
> dprintk("RPC: %4d pmap_getport_done(status %d, port %u)\n",
> - child->tk_pid, child->tk_status, map->pm_port);
> + child->tk_pid, status, map->pm_port);
>
> - pmap_wake_portmap_waiters(xprt);
> + pmap_wake_portmap_waiters(xprt, status);
> + xprt_put(xprt);
> }
>
> /**
More information about the NFSv4
mailing list