[nfsv4] [nfsv4] [nfsv4] server-cluster-locking-api: cthon failure

Bryce Harrington bryce at osdl.org
Wed Jul 5 20:26:03 EDT 2006


On Tue, Jul 04, 2006 at 05:55:50PM -0400, J. Bruce Fields wrote:
> On Tue, Jul 04, 2006 at 02:45:51PM -0700, Bryce Harrington wrote:
> > Would it be helpful if I set up a couple boxes with this kernel that
> > reproduce the error, that you can log in and poke around on?  Or is
> > there a way I can gather the necessary info for you?
> 
> It might, if I could also get instructions on how to rebuild and
> reinstall the same kernel on those machines.  I'm not sure if I'll be
> able to get to it soon enough, though....

Good deal; I'll draw up some directions.  It's pretty straightforward,
but a little tutorial couldn't hurt.  I will probably have it written
within a day or two.
 
Bryce

> I think the first thing to try is to get a network trace showing the
> failure.
> 
> Then I think what I'd want to do next is run with something like this
> (untested) and watch the logs to see which case it's hitting.
> 
> --b.
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 6330f02..bbecaf3 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -899,10 +899,14 @@ nfsd4_setclientid_confirm(struct svc_rqs
>  	unconf = find_unconfirmed_client(clid);
>  
>  	status = nfserr_clid_inuse;
> -	if (conf && conf->cl_addr != ip_addr)
> +	if (conf && conf->cl_addr != ip_addr) {
> +		printk("clid_inuse: bad conf ip\n");
>  		goto out;
> -	if (unconf && unconf->cl_addr != ip_addr)
> +	}
> +	if (unconf && unconf->cl_addr != ip_addr) {
> +		printk("clid_inuse_ bad unconf ip\n");
>  		goto out;
> +	}
>  
>  	if ((conf && unconf) && 
>  	    (cmp_verf(&unconf->cl_confirm, &confirm)) &&
> @@ -914,9 +918,10 @@ nfsd4_setclientid_confirm(struct svc_rqs
>  		* conf record that matches input clientid.
>  		* conf and unconf records match names, verifiers
>  		*/
> -		if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) 
> +		if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) {
> +			printk("clid_inuse: case 1\n");
>  			status = nfserr_clid_inuse;
> -		else {
> +		} else {
>  			/* XXX: We just turn off callbacks until we can handle
>  			  * change request correctly. */
>  			atomic_set(&conf->cl_callback.cb_set, 0);
> @@ -936,9 +941,10 @@ nfsd4_setclientid_confirm(struct svc_rqs
>  		 * unconf->cl_name or unconf->cl_verifier don't match the
>  		 * conf record.
>  		 */
> -		if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred))
> +		if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
> +			printk("clid_inuse: case 2\n");
>  			status = nfserr_clid_inuse;
> -		else
> +		} else
>  			status = nfs_ok;
>  	} else if (!conf && unconf
>  			&& cmp_verf(&unconf->cl_confirm, &confirm)) {
> @@ -948,6 +954,7 @@ nfsd4_setclientid_confirm(struct svc_rqs
>  		 * unconf->cl_confirm matches input confirm
>  		 */
>  		if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
> +			printk("clid_inuse: case 3\n");
>  			status = nfserr_clid_inuse;
>  		} else {
>  			unsigned int hash =
> @@ -974,6 +981,7 @@ nfsd4_setclientid_confirm(struct svc_rqs
>  		status = nfserr_stale_clientid;
>  	} else {
>  		/* check that we have hit one of the cases...*/
> +		printk("clid_inuse: mystery case!\n");
>  		status = nfserr_clid_inuse;
>  	}
>  out:


More information about the NFSv4 mailing list