[pnfs] [PATCH 1/6] pnfs: fix commit lseg reference counting

Andy Adamson andros at netapp.com
Mon Jul 28 09:41:08 EDT 2008


On Sun, 2008-07-27 at 19:48 +0300, Benny Halevy wrote:
> On Wed, 23 Jul 2008 15:05:09 -0400 andros at netapp.com wrote:
> > From: Andy Adamson <andros at localhost.localdomain>
> > 
> > This fixes a bug where destroy_lseg was not called at umount for the
> > filelayout driver.
> > 
> > The commit lseg kref is incremented by pnfs_has_layout in pnfs_update_layout
> > called in the COMMIT path. Refactor nfs_initiate_commit to match the read and
> 
> The refactoring part better be done as a separate, preliminary patch.

OK.

> 
> > write code by adding an rpc_call_ops parameter. This allows the filelayout
> > driver (as the non-RPC drivers do) to call pnfs_commit_done->pnfs_call_done->
> > put_lseg, and then call the nfs_commit_ops rpc_call_done and rpc_release
> > routines.
> > 
> > An lseg reference is already taken for a COMMIT to the MDS, or a COMMIT to the
> > first data server.  Add a kref_get on the commit lseg for the remaining data
> > servers.
> > 
> > Signed-off-by: Andy Adamson<andros at netapp.com>
> > ---
> >  fs/nfs/internal.h       |    4 +++-
> >  fs/nfs/nfs4filelayout.c |   24 ++++++++++++++++++++++--
> >  fs/nfs/nfs4proc.c       |    2 +-
> >  fs/nfs/pnfs.c           |    3 ++-
> >  fs/nfs/write.c          |    6 ++++--
> >  5 files changed, 32 insertions(+), 7 deletions(-)
> > 
> > diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> > index 1db3d4b..f6308d1 100644
> > --- a/fs/nfs/internal.h
> > +++ b/fs/nfs/internal.h
> > @@ -212,7 +212,9 @@ extern int nfs_initiate_write(struct nfs_write_data *data,
> >  			      int how);
> >  extern int nfs_write_validate(struct rpc_task *task, void *calldata);
> >  extern int nfs_initiate_commit(struct nfs_write_data *data,
> > -			       struct rpc_clnt *clnt, int how);
> > +			       struct rpc_clnt *clnt,
> > +			       const struct rpc_call_ops *call_ops,
> > +			       int how);
> >  extern int nfs_flush_one(struct inode *inode, struct list_head *head,
> >  			 unsigned int npages, size_t count, int how);
> >  
> > diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> > index 7ed06ac..6d0ae53 100644
> > --- a/fs/nfs/nfs4filelayout.c
> > +++ b/fs/nfs/nfs4filelayout.c
> > @@ -558,12 +558,25 @@ filelayout_clone_write_data(struct nfs_write_data *old)
> >  	nfs_fattr_init(&new->fattr);
> >  	new->res.verf    = &new->verf;
> >  	new->args.context = old->args.context;
> > +	new->pdata.lseg = old->pdata.lseg;
> >  	new->pdata.call_ops = old->pdata.call_ops;
> >  	new->pdata.how = old->pdata.how;
> >  out:
> >  	return new;
> >  }
> >  
> > +static void filelayout_commit_call_done(struct rpc_task *task, void *data)
> > +{
> > +	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
> > +
> > +	pnfs_callback_ops->nfs_commit_complete(wdata);
> > +}
> > +
> > +struct rpc_call_ops filelayout_commit_call_ops = {
> > +	.rpc_call_validate_args = nfs_write_validate,
> > +	.rpc_call_done = filelayout_commit_call_done,
> > +};
> > +
> >  /*
> >   * Execute a COMMIT op to the MDS or to each data server on which a page
> >   * in 'pages' exists.
> > @@ -630,8 +643,14 @@ filelayout_commit(struct pnfs_layout_type *layoutid, int sync,
> >  			data->pdata.pnfs_error = -ENOMEM;
> >  			goto out;
> >  		}
> > -		if (count != 0)
> > +		if (count != 0) {
> > +			/* The open context and layout segment have
> > +			 * have been referenced for the call to the first
> > +			 * data server. Do the same for any other data servers.
> > +			 */
> >  			atomic_inc(&dsdata->args.context->count);
> > +			kref_get(&data->pdata.lseg->kref);
> 
> This is a tad too late, isn't it?

I think it's right, in exactly the same way that incrementing the
context count is right. 

> nfs_initiate_commit was already called for the first iteration and it might
> have already dereferenced lseg.

Sure, it would dereference the lseg, after referencing it. The first
iteration is exactly like commiting to the MDS. It has the lseg
referenced in pnfs_update_layout, and dereferenced in pnfs_call_done.
The subsequent data server commits (e.g. count > 0) have the lseg
referenced here, and dereferenced in pnfs_call_done.

>  There is a reference taken in pnfs_update_layout
> We can get the extra reference at this point if !list_empty(head),

It's not an extra reference. It's the reference for the call generated
by !list_empty(head) and dereferenced in pnfs_call_done. If we remove
this kref, then we have an extra dereference. 

> i.e. there's more work to do.


> 
> > +		}
> >  
> >  		/* Just try the first multipath data server */
> >  		ds = dserver.dev->ds_list[0];
> > @@ -661,7 +680,8 @@ filelayout_commit(struct pnfs_layout_type *layoutid, int sync,
> >  		print_ds(ds);
> >  
> >  		/* Send COMMIT to data server */
> > -		nfs_initiate_commit(dsdata, dsdata->fldata.pnfs_client, sync);
> > +		nfs_initiate_commit(dsdata, dsdata->fldata.pnfs_client,
> > +				    &filelayout_commit_call_ops, sync);
> >  		count++;
> >  	}
> >  
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index 540a3b8..379f98c 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -3067,7 +3067,7 @@ static int pnfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
> >  }
> >  
> >  /*
> > - * rpc_call_done callback for a write to the MDS or to a filelayout Data Server
> > + * rpc_call_done callback for a commit to the MDS or to a filelayout Data Server
> 
> It'll really help if the cosmetic changes are separated out
> into a different patch.

OK

> 
> >   */
> >  static int pnfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
> >  {
> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> > index 21c5c8d..47e00ea 100644
> > --- a/fs/nfs/pnfs.c
> > +++ b/fs/nfs/pnfs.c
> > @@ -392,6 +392,7 @@ destroy_lseg(struct kref *kref)
> >  	struct pnfs_layout_segment *lseg =
> >  		container_of(kref, struct pnfs_layout_segment, kref);
> >  
> > +	dprintk("--> %s\n", __func__);
> 
> ditto

OK

> 
> >  	PNFS_LD_IO_OPS(lseg->layout)->free_lseg(lseg);
> >  }
> >  
> > @@ -1738,7 +1739,7 @@ _pnfs_try_to_commit(struct nfs_write_data *data,
> >  	}
> >  }
> >  
> > -/* pNFS Commit callback function for non-file layout drivers */
> > +/* pNFS Commit callback function */
> 
> ditto

This comment reflects the change in code, and should accompany it, not
be in a separate patch. It will be in the refactoring patch.

> 
> >  static void
> >  pnfs_commit_done(struct nfs_write_data *data)
> >  {
> > diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> > index 71a9dcc..b62aee7 100644
> > --- a/fs/nfs/write.c
> > +++ b/fs/nfs/write.c
> > @@ -1245,6 +1245,7 @@ void nfs_commitdata_release(void *data)
> >  
> >  int nfs_initiate_commit(struct nfs_write_data *data,
> >  			struct rpc_clnt *clnt,
> > +			const struct rpc_call_ops *call_ops,
> >  			int how)
> >  {
> >  	struct inode *inode = data->inode;
> > @@ -1260,7 +1261,7 @@ int nfs_initiate_commit(struct nfs_write_data *data,
> >  		.task = &data->task,
> >  		.rpc_client = clnt,
> >  		.rpc_message = &msg,
> > -		.callback_ops = &nfs_commit_ops,
> > +		.callback_ops = call_ops,
> >  		.callback_data = data,
> >  		.workqueue = nfsiod_workqueue,
> >  		.flags = flags,
> > @@ -1315,7 +1316,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
> >  	if (ret == PNFS_ATTEMPTED)
> >  		return pnfs_get_write_status(data);
> >  
> > -	return nfs_initiate_commit(data, NFS_CLIENT(inode), how);
> > +	return nfs_initiate_commit(data, NFS_CLIENT(inode), &nfs_commit_ops,
> > +				   how);
> >  }
> >  
> >  /*


More information about the pNFS mailing list